14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #if KMP_USE_HIER_SCHED
28 #include "kmp_dispatch_hier.h"
32 #include "ompt-specific.h"
35 #include "ompd-specific.h"
38 #if OMP_PROFILING_SUPPORT
39 #include "llvm/Support/TimeProfiler.h"
40 static char *ProfileTraceFile =
nullptr;
44 #define KMP_USE_PRCTL 0
59 #if defined(KMP_GOMP_COMPAT)
60 char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64 char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68 char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77 kmp_info_t __kmp_monitor;
82 void __kmp_cleanup(
void);
84 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89 #if KMP_AFFINITY_SUPPORTED
90 static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93 static void __kmp_do_serial_initialize(
void);
94 void __kmp_fork_barrier(
int gtid,
int tid);
95 void __kmp_join_barrier(
int gtid);
96 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99 #ifdef USE_LOAD_BALANCE
100 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103 static int __kmp_expand_threads(
int nNeed);
105 static int __kmp_unregister_root_other_thread(
int gtid);
107 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117 int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
138 #ifdef KMP_TDATA_GTID
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
189 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
190 "thread, using TLS\n"));
191 i = __kmp_gtid_get_specific();
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i);
205 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
213 stack_base - stack_addr);
217 if (__kmp_storage_map) {
218 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
227 int __kmp_get_global_thread_id_reg() {
230 if (!__kmp_init_serial) {
233 #ifdef KMP_TDATA_GTID
234 if (TCR_4(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
239 if (TCR_4(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
241 gtid = __kmp_gtid_get_specific();
244 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
245 gtid = __kmp_get_global_thread_id();
249 if (gtid == KMP_GTID_DNE) {
251 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
252 "Registering a new gtid.\n"));
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
258 gtid = __kmp_register_root(FALSE);
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
264 KMP_DEBUG_ASSERT(gtid >= 0);
270 void __kmp_check_stack_overlap(kmp_info_t *th) {
272 char *stack_beg = NULL;
273 char *stack_end = NULL;
276 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
277 if (__kmp_storage_map) {
278 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
281 gtid = __kmp_gtid_from_thread(th);
283 if (gtid == KMP_GTID_MONITOR) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)",
"mon",
287 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
301 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
302 if (stack_beg == NULL) {
303 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
325 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
331 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
336 void __kmp_infinite_loop(
void) {
337 static int done = FALSE;
344 #define MAX_MESSAGE 512
346 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE];
351 va_start(ap, format);
352 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (
unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356 #if KMP_PRINT_DATA_PLACEMENT
359 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
363 __kmp_storage_map_verbose = FALSE;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
369 const int page_size = KMP_GET_PAGE_SIZE();
371 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
374 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
377 __kmp_printf_no_lock(
" GTID %d\n", gtid);
386 (
char *)p1 += page_size;
387 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
392 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
393 (
char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
396 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
397 (
char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
404 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
410 void __kmp_warn(
char const *format, ...) {
411 char buffer[MAX_MESSAGE];
414 if (__kmp_generate_warnings == kmp_warnings_off) {
418 va_start(ap, format);
420 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
428 void __kmp_abort_process() {
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
436 if (KMP_OS_WINDOWS) {
439 __kmp_global.g.g_abort = SIGABRT;
453 __kmp_unregister_library();
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
462 void __kmp_abort_thread(
void) {
465 __kmp_infinite_loop();
471 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
495 #if KMP_FAST_REDUCTION_BARRIER
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
506 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
507 int team_id,
int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
527 #if KMP_FAST_REDUCTION_BARRIER
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
548 static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
552 static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
559 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
564 case DLL_PROCESS_ATTACH:
565 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
569 case DLL_PROCESS_DETACH:
570 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
583 if (lpReserved == NULL)
584 __kmp_internal_end_library(__kmp_gtid_get_specific());
588 case DLL_THREAD_ATTACH:
589 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
595 case DLL_THREAD_DETACH:
596 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
598 __kmp_internal_end_thread(__kmp_gtid_get_specific());
609 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
610 int gtid = *gtid_ref;
611 #ifdef BUILD_PARALLEL_ORDERED
612 kmp_team_t *team = __kmp_team_from_gtid(gtid);
615 if (__kmp_env_consistency_check) {
616 if (__kmp_threads[gtid]->th.th_root->r.r_active)
617 #if KMP_USE_DYNAMIC_LOCK
618 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
620 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
623 #ifdef BUILD_PARALLEL_ORDERED
624 if (!team->t.t_serialized) {
626 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
634 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
635 int gtid = *gtid_ref;
636 #ifdef BUILD_PARALLEL_ORDERED
637 int tid = __kmp_tid_from_gtid(gtid);
638 kmp_team_t *team = __kmp_team_from_gtid(gtid);
641 if (__kmp_env_consistency_check) {
642 if (__kmp_threads[gtid]->th.th_root->r.r_active)
643 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
645 #ifdef BUILD_PARALLEL_ORDERED
646 if (!team->t.t_serialized) {
651 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
661 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
666 if (!TCR_4(__kmp_init_parallel))
667 __kmp_parallel_initialize();
668 __kmp_resume_if_soft_paused();
670 th = __kmp_threads[gtid];
671 team = th->th.th_team;
674 th->th.th_ident = id_ref;
676 if (team->t.t_serialized) {
679 kmp_int32 old_this = th->th.th_local.this_construct;
681 ++th->th.th_local.this_construct;
685 if (team->t.t_construct == old_this) {
686 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
687 th->th.th_local.this_construct);
690 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
691 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
692 team->t.t_active_level == 1) {
694 __kmp_itt_metadata_single(id_ref);
699 if (__kmp_env_consistency_check) {
700 if (status && push_ws) {
701 __kmp_push_workshare(gtid, ct_psingle, id_ref);
703 __kmp_check_workshare(gtid, ct_psingle, id_ref);
708 __kmp_itt_single_start(gtid);
714 void __kmp_exit_single(
int gtid) {
716 __kmp_itt_single_end(gtid);
718 if (__kmp_env_consistency_check)
719 __kmp_pop_workshare(gtid, ct_psingle, NULL);
728 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
729 int master_tid,
int set_nthreads,
733 KMP_DEBUG_ASSERT(__kmp_init_serial);
734 KMP_DEBUG_ASSERT(root && parent_team);
735 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
739 new_nthreads = set_nthreads;
740 if (!get__dynamic_2(parent_team, master_tid)) {
743 #ifdef USE_LOAD_BALANCE
744 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
745 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
746 if (new_nthreads == 1) {
747 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
748 "reservation to 1 thread\n",
752 if (new_nthreads < set_nthreads) {
753 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
754 "reservation to %d threads\n",
755 master_tid, new_nthreads));
759 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
760 new_nthreads = __kmp_avail_proc - __kmp_nth +
761 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
762 if (new_nthreads <= 1) {
763 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
764 "reservation to 1 thread\n",
768 if (new_nthreads < set_nthreads) {
769 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
770 "reservation to %d threads\n",
771 master_tid, new_nthreads));
773 new_nthreads = set_nthreads;
775 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
776 if (set_nthreads > 2) {
777 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
778 new_nthreads = (new_nthreads % set_nthreads) + 1;
779 if (new_nthreads == 1) {
780 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
781 "reservation to 1 thread\n",
785 if (new_nthreads < set_nthreads) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
787 "reservation to %d threads\n",
788 master_tid, new_nthreads));
796 if (__kmp_nth + new_nthreads -
797 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
799 int tl_nthreads = __kmp_max_nth - __kmp_nth +
800 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
801 if (tl_nthreads <= 0) {
806 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
807 __kmp_reserve_warn = 1;
808 __kmp_msg(kmp_ms_warning,
809 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
810 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
812 if (tl_nthreads == 1) {
813 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
814 "reduced reservation to 1 thread\n",
818 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
819 "reservation to %d threads\n",
820 master_tid, tl_nthreads));
821 new_nthreads = tl_nthreads;
825 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
826 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
827 if (cg_nthreads + new_nthreads -
828 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
830 int tl_nthreads = max_cg_threads - cg_nthreads +
831 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
832 if (tl_nthreads <= 0) {
837 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
838 __kmp_reserve_warn = 1;
839 __kmp_msg(kmp_ms_warning,
840 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
841 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
843 if (tl_nthreads == 1) {
844 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
845 "reduced reservation to 1 thread\n",
849 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
850 "reservation to %d threads\n",
851 master_tid, tl_nthreads));
852 new_nthreads = tl_nthreads;
858 capacity = __kmp_threads_capacity;
859 if (TCR_PTR(__kmp_threads[0]) == NULL) {
865 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
866 capacity -= __kmp_hidden_helper_threads_num;
868 if (__kmp_nth + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
872 int slotsRequired = __kmp_nth + new_nthreads -
873 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
875 int slotsAdded = __kmp_expand_threads(slotsRequired);
876 if (slotsAdded < slotsRequired) {
878 new_nthreads -= (slotsRequired - slotsAdded);
879 KMP_ASSERT(new_nthreads >= 1);
882 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
883 __kmp_reserve_warn = 1;
884 if (__kmp_tp_cached) {
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
887 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
888 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
890 __kmp_msg(kmp_ms_warning,
891 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
892 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
899 if (new_nthreads == 1) {
901 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
902 "dead roots and rechecking; requested %d threads\n",
903 __kmp_get_gtid(), set_nthreads));
905 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
907 __kmp_get_gtid(), new_nthreads, set_nthreads));
916 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
917 kmp_info_t *master_th,
int master_gtid) {
921 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
922 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
926 master_th->th.th_info.ds.ds_tid = 0;
927 master_th->th.th_team = team;
928 master_th->th.th_team_nproc = team->t.t_nproc;
929 master_th->th.th_team_master = master_th;
930 master_th->th.th_team_serialized = FALSE;
931 master_th->th.th_dispatch = &team->t.t_dispatch[0];
934 #if KMP_NESTED_HOT_TEAMS
936 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
939 int level = team->t.t_active_level - 1;
940 if (master_th->th.th_teams_microtask) {
941 if (master_th->th.th_teams_size.nteams > 1) {
945 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
946 master_th->th.th_teams_level == team->t.t_level) {
951 if (level < __kmp_hot_teams_max_level) {
952 if (hot_teams[level].hot_team) {
954 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
958 hot_teams[level].hot_team = team;
959 hot_teams[level].hot_team_nth = team->t.t_nproc;
966 use_hot_team = team == root->r.r_hot_team;
971 team->t.t_threads[0] = master_th;
972 __kmp_initialize_info(master_th, team, 0, master_gtid);
975 for (i = 1; i < team->t.t_nproc; i++) {
978 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
979 team->t.t_threads[i] = thr;
980 KMP_DEBUG_ASSERT(thr);
981 KMP_DEBUG_ASSERT(thr->th.th_team == team);
983 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
984 "T#%d(%d:%d) join =%llu, plain=%llu\n",
985 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
986 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
987 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
988 team->t.t_bar[bs_plain_barrier].b_arrived));
989 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
990 thr->th.th_teams_level = master_th->th.th_teams_level;
991 thr->th.th_teams_size = master_th->th.th_teams_size;
994 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
995 for (b = 0; b < bs_last_barrier; ++b) {
996 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
997 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
999 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1005 #if KMP_AFFINITY_SUPPORTED
1006 __kmp_partition_places(team);
1010 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1011 for (i = 0; i < team->t.t_nproc; i++) {
1012 kmp_info_t *thr = team->t.t_threads[i];
1013 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1014 thr->th.th_prev_level != team->t.t_level) {
1015 team->t.t_display_affinity = 1;
1024 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1028 inline static void propagateFPControl(kmp_team_t *team) {
1029 if (__kmp_inherit_fp_control) {
1030 kmp_int16 x87_fpu_control_word;
1034 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1035 __kmp_store_mxcsr(&mxcsr);
1036 mxcsr &= KMP_X86_MXCSR_MASK;
1047 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1048 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1051 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1055 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1061 inline static void updateHWFPControl(kmp_team_t *team) {
1062 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1065 kmp_int16 x87_fpu_control_word;
1067 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1068 __kmp_store_mxcsr(&mxcsr);
1069 mxcsr &= KMP_X86_MXCSR_MASK;
1071 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1072 __kmp_clear_x87_fpu_status_word();
1073 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1076 if (team->t.t_mxcsr != mxcsr) {
1077 __kmp_load_mxcsr(&team->t.t_mxcsr);
1082 #define propagateFPControl(x) ((void)0)
1083 #define updateHWFPControl(x) ((void)0)
1086 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1091 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1092 kmp_info_t *this_thr;
1093 kmp_team_t *serial_team;
1095 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1102 if (!TCR_4(__kmp_init_parallel))
1103 __kmp_parallel_initialize();
1104 __kmp_resume_if_soft_paused();
1106 this_thr = __kmp_threads[global_tid];
1107 serial_team = this_thr->th.th_serial_team;
1110 KMP_DEBUG_ASSERT(serial_team);
1113 if (__kmp_tasking_mode != tskm_immediate_exec) {
1115 this_thr->th.th_task_team ==
1116 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1117 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1119 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1120 "team %p, new task_team = NULL\n",
1121 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1122 this_thr->th.th_task_team = NULL;
1125 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1126 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1127 proc_bind = proc_bind_false;
1128 }
else if (proc_bind == proc_bind_default) {
1131 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1134 this_thr->th.th_set_proc_bind = proc_bind_default;
1137 ompt_data_t ompt_parallel_data = ompt_data_none;
1138 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1139 if (ompt_enabled.enabled &&
1140 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1142 ompt_task_info_t *parent_task_info;
1143 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1145 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1146 if (ompt_enabled.ompt_callback_parallel_begin) {
1149 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1150 &(parent_task_info->task_data), &(parent_task_info->frame),
1151 &ompt_parallel_data, team_size,
1152 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1157 if (this_thr->th.th_team != serial_team) {
1159 int level = this_thr->th.th_team->t.t_level;
1161 if (serial_team->t.t_serialized) {
1164 kmp_team_t *new_team;
1166 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1169 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1173 proc_bind, &this_thr->th.th_current_task->td_icvs,
1174 0 USE_NESTED_HOT_ARG(NULL));
1175 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1176 KMP_ASSERT(new_team);
1179 new_team->t.t_threads[0] = this_thr;
1180 new_team->t.t_parent = this_thr->th.th_team;
1181 serial_team = new_team;
1182 this_thr->th.th_serial_team = serial_team;
1186 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1187 global_tid, serial_team));
1195 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1196 global_tid, serial_team));
1200 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1201 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1202 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1203 serial_team->t.t_ident = loc;
1204 serial_team->t.t_serialized = 1;
1205 serial_team->t.t_nproc = 1;
1206 serial_team->t.t_parent = this_thr->th.th_team;
1207 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1208 this_thr->th.th_team = serial_team;
1209 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1211 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1212 this_thr->th.th_current_task));
1213 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1214 this_thr->th.th_current_task->td_flags.executing = 0;
1216 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1221 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1222 &this_thr->th.th_current_task->td_parent->td_icvs);
1226 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1227 this_thr->th.th_current_task->td_icvs.nproc =
1228 __kmp_nested_nth.nth[level + 1];
1231 if (__kmp_nested_proc_bind.used &&
1232 (level + 1 < __kmp_nested_proc_bind.used)) {
1233 this_thr->th.th_current_task->td_icvs.proc_bind =
1234 __kmp_nested_proc_bind.bind_types[level + 1];
1238 serial_team->t.t_pkfn = (microtask_t)(~0);
1240 this_thr->th.th_info.ds.ds_tid = 0;
1243 this_thr->th.th_team_nproc = 1;
1244 this_thr->th.th_team_master = this_thr;
1245 this_thr->th.th_team_serialized = 1;
1247 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1248 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1249 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1251 propagateFPControl(serial_team);
1254 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1255 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1256 serial_team->t.t_dispatch->th_disp_buffer =
1257 (dispatch_private_info_t *)__kmp_allocate(
1258 sizeof(dispatch_private_info_t));
1260 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1267 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1268 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1269 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1270 ++serial_team->t.t_serialized;
1271 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1274 int level = this_thr->th.th_team->t.t_level;
1277 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1278 this_thr->th.th_current_task->td_icvs.nproc =
1279 __kmp_nested_nth.nth[level + 1];
1281 serial_team->t.t_level++;
1282 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1283 "of serial team %p to %d\n",
1284 global_tid, serial_team, serial_team->t.t_level));
1287 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1289 dispatch_private_info_t *disp_buffer =
1290 (dispatch_private_info_t *)__kmp_allocate(
1291 sizeof(dispatch_private_info_t));
1292 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1293 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1295 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1299 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1303 if (__kmp_display_affinity) {
1304 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1305 this_thr->th.th_prev_num_threads != 1) {
1307 __kmp_aux_display_affinity(global_tid, NULL);
1308 this_thr->th.th_prev_level = serial_team->t.t_level;
1309 this_thr->th.th_prev_num_threads = 1;
1313 if (__kmp_env_consistency_check)
1314 __kmp_push_parallel(global_tid, NULL);
1316 serial_team->t.ompt_team_info.master_return_address = codeptr;
1317 if (ompt_enabled.enabled &&
1318 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1319 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1320 OMPT_GET_FRAME_ADDRESS(0);
1322 ompt_lw_taskteam_t lw_taskteam;
1323 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1324 &ompt_parallel_data, codeptr);
1326 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1330 if (ompt_enabled.ompt_callback_implicit_task) {
1331 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1332 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1333 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1334 ompt_task_implicit);
1335 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1336 __kmp_tid_from_gtid(global_tid);
1340 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1341 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1342 OMPT_GET_FRAME_ADDRESS(0);
1349 int __kmp_fork_call(
ident_t *loc,
int gtid,
1350 enum fork_context_e call_context,
1351 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1356 int master_this_cons;
1358 kmp_team_t *parent_team;
1359 kmp_info_t *master_th;
1363 int master_set_numthreads;
1367 #if KMP_NESTED_HOT_TEAMS
1368 kmp_hot_team_ptr_t **p_hot_teams;
1371 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1374 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1375 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1378 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1380 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1381 __kmp_stkpadding += (short)((kmp_int64)dummy);
1387 if (!TCR_4(__kmp_init_parallel))
1388 __kmp_parallel_initialize();
1389 __kmp_resume_if_soft_paused();
1392 master_th = __kmp_threads[gtid];
1394 parent_team = master_th->th.th_team;
1395 master_tid = master_th->th.th_info.ds.ds_tid;
1396 master_this_cons = master_th->th.th_local.this_construct;
1397 root = master_th->th.th_root;
1398 master_active = root->r.r_active;
1399 master_set_numthreads = master_th->th.th_set_nproc;
1402 ompt_data_t ompt_parallel_data = ompt_data_none;
1403 ompt_data_t *parent_task_data;
1404 ompt_frame_t *ompt_frame;
1405 ompt_data_t *implicit_task_data;
1406 void *return_address = NULL;
1408 if (ompt_enabled.enabled) {
1409 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1411 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1416 __kmp_assign_root_init_mask();
1419 level = parent_team->t.t_level;
1421 active_level = parent_team->t.t_active_level;
1423 teams_level = master_th->th.th_teams_level;
1424 #if KMP_NESTED_HOT_TEAMS
1425 p_hot_teams = &master_th->th.th_hot_teams;
1426 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1427 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1428 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1429 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1431 (*p_hot_teams)[0].hot_team_nth = 1;
1436 if (ompt_enabled.enabled) {
1437 if (ompt_enabled.ompt_callback_parallel_begin) {
1438 int team_size = master_set_numthreads
1439 ? master_set_numthreads
1440 : get__nproc_2(parent_team, master_tid);
1441 int flags = OMPT_INVOKER(call_context) |
1442 ((microtask == (microtask_t)__kmp_teams_master)
1443 ? ompt_parallel_league
1444 : ompt_parallel_team);
1445 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1446 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1449 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1453 master_th->th.th_ident = loc;
1455 if (master_th->th.th_teams_microtask && ap &&
1456 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1460 parent_team->t.t_ident = loc;
1461 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1462 parent_team->t.t_argc = argc;
1463 argv = (
void **)parent_team->t.t_argv;
1464 for (i = argc - 1; i >= 0; --i)
1465 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1467 if (parent_team == master_th->th.th_serial_team) {
1470 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1472 if (call_context == fork_context_gnu) {
1475 parent_team->t.t_serialized--;
1480 parent_team->t.t_pkfn = microtask;
1485 void **exit_frame_p;
1487 ompt_lw_taskteam_t lw_taskteam;
1489 if (ompt_enabled.enabled) {
1490 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1491 &ompt_parallel_data, return_address);
1492 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1494 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1498 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1499 if (ompt_enabled.ompt_callback_implicit_task) {
1500 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1501 __kmp_tid_from_gtid(gtid);
1502 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1503 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1504 implicit_task_data, 1,
1505 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1509 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1511 exit_frame_p = &dummy;
1516 parent_team->t.t_serialized--;
1519 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1520 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1521 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1530 if (ompt_enabled.enabled) {
1531 *exit_frame_p = NULL;
1532 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1533 if (ompt_enabled.ompt_callback_implicit_task) {
1534 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1535 ompt_scope_end, NULL, implicit_task_data, 1,
1536 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1538 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1539 __ompt_lw_taskteam_unlink(master_th);
1540 if (ompt_enabled.ompt_callback_parallel_end) {
1541 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1542 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1543 OMPT_INVOKER(call_context) | ompt_parallel_team,
1546 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1552 parent_team->t.t_pkfn = microtask;
1553 parent_team->t.t_invoke = invoker;
1554 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1555 parent_team->t.t_active_level++;
1556 parent_team->t.t_level++;
1557 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1560 if (ompt_enabled.enabled) {
1561 ompt_lw_taskteam_t lw_taskteam;
1562 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1563 &ompt_parallel_data, return_address);
1564 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1569 if (master_set_numthreads) {
1570 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1572 kmp_info_t **other_threads = parent_team->t.t_threads;
1575 int old_proc = master_th->th.th_teams_size.nth;
1576 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
1578 __kmp_resize_dist_barrier(parent_team, old_proc,
1579 master_set_numthreads);
1580 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1582 parent_team->t.t_nproc = master_set_numthreads;
1583 for (i = 0; i < master_set_numthreads; ++i) {
1584 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1588 master_th->th.th_set_nproc = 0;
1592 if (__kmp_debugging) {
1593 int nth = __kmp_omp_num_threads(loc);
1595 master_set_numthreads = nth;
1600 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1601 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1603 __kmp_forkjoin_frames_mode == 3 &&
1604 parent_team->t.t_active_level == 1
1605 && master_th->th.th_teams_size.nteams == 1) {
1606 kmp_uint64 tmp_time = __itt_get_timestamp();
1607 master_th->th.th_frame_time = tmp_time;
1608 parent_team->t.t_region_time = tmp_time;
1610 if (__itt_stack_caller_create_ptr) {
1611 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1613 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1617 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, "
1618 "master_th=%p, gtid=%d\n",
1619 root, parent_team, master_th, gtid));
1620 __kmp_internal_fork(loc, gtid, parent_team);
1621 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, "
1622 "master_th=%p, gtid=%d\n",
1623 root, parent_team, master_th, gtid));
1625 if (call_context == fork_context_gnu)
1629 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1630 parent_team->t.t_id, parent_team->t.t_pkfn));
1632 if (!parent_team->t.t_invoke(gtid)) {
1633 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1635 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1636 parent_team->t.t_id, parent_team->t.t_pkfn));
1639 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1645 if (__kmp_tasking_mode != tskm_immediate_exec) {
1646 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1647 parent_team->t.t_task_team[master_th->th.th_task_state]);
1654 int enter_teams = 0;
1655 if (parent_team->t.t_active_level >=
1656 master_th->th.th_current_task->td_icvs.max_active_levels) {
1659 enter_teams = ((ap == NULL && active_level == 0) ||
1660 (ap && teams_level > 0 && teams_level == level));
1661 nthreads = master_set_numthreads
1662 ? master_set_numthreads
1664 : get__nproc_2(parent_team, master_tid);
1669 if ((get__max_active_levels(master_th) == 1 &&
1670 (root->r.r_in_parallel && !enter_teams)) ||
1671 (__kmp_library == library_serial)) {
1672 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d"
1680 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1685 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1686 nthreads, enter_teams);
1687 if (nthreads == 1) {
1691 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1695 KMP_DEBUG_ASSERT(nthreads > 0);
1698 master_th->th.th_set_nproc = 0;
1701 if (nthreads == 1) {
1703 #if KMP_OS_LINUX && \
1704 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1707 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1712 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1717 master_th->th.th_serial_team->t.t_pkfn = microtask;
1720 if (call_context == fork_context_intel) {
1722 master_th->th.th_serial_team->t.t_ident = loc;
1725 master_th->th.th_serial_team->t.t_level--;
1730 void **exit_frame_p;
1731 ompt_task_info_t *task_info;
1733 ompt_lw_taskteam_t lw_taskteam;
1735 if (ompt_enabled.enabled) {
1736 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1737 &ompt_parallel_data, return_address);
1739 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1742 task_info = OMPT_CUR_TASK_INFO(master_th);
1743 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1744 if (ompt_enabled.ompt_callback_implicit_task) {
1745 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1746 __kmp_tid_from_gtid(gtid);
1747 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1748 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1749 &(task_info->task_data), 1,
1750 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1751 ompt_task_implicit);
1755 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1757 exit_frame_p = &dummy;
1762 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1763 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1764 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1765 parent_team->t.t_argv
1774 if (ompt_enabled.enabled) {
1775 *exit_frame_p = NULL;
1776 if (ompt_enabled.ompt_callback_implicit_task) {
1777 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1778 ompt_scope_end, NULL, &(task_info->task_data), 1,
1779 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1780 ompt_task_implicit);
1782 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1783 __ompt_lw_taskteam_unlink(master_th);
1784 if (ompt_enabled.ompt_callback_parallel_end) {
1785 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1786 &ompt_parallel_data, parent_task_data,
1787 OMPT_INVOKER(call_context) | ompt_parallel_team,
1790 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1793 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1794 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1795 master_th->th.th_serial_team);
1796 team = master_th->th.th_team;
1798 team->t.t_invoke = invoker;
1799 __kmp_alloc_argv_entries(argc, team, TRUE);
1800 team->t.t_argc = argc;
1801 argv = (
void **)team->t.t_argv;
1803 for (i = argc - 1; i >= 0; --i)
1804 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1806 for (i = 0; i < argc; ++i)
1808 argv[i] = parent_team->t.t_argv[i];
1816 if (ompt_enabled.enabled) {
1817 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1818 if (ompt_enabled.ompt_callback_implicit_task) {
1819 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1820 ompt_scope_end, NULL, &(task_info->task_data), 0,
1821 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1823 if (ompt_enabled.ompt_callback_parallel_end) {
1824 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1825 &ompt_parallel_data, parent_task_data,
1826 OMPT_INVOKER(call_context) | ompt_parallel_league,
1829 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1834 for (i = argc - 1; i >= 0; --i)
1835 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1840 void **exit_frame_p;
1841 ompt_task_info_t *task_info;
1843 ompt_lw_taskteam_t lw_taskteam;
1845 if (ompt_enabled.enabled) {
1846 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1847 &ompt_parallel_data, return_address);
1848 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1850 task_info = OMPT_CUR_TASK_INFO(master_th);
1851 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1854 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1855 if (ompt_enabled.ompt_callback_implicit_task) {
1856 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1857 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1858 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1859 ompt_task_implicit);
1860 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1861 __kmp_tid_from_gtid(gtid);
1865 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1867 exit_frame_p = &dummy;
1872 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1873 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1874 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1883 if (ompt_enabled.enabled) {
1884 *exit_frame_p = NULL;
1885 if (ompt_enabled.ompt_callback_implicit_task) {
1886 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1887 ompt_scope_end, NULL, &(task_info->task_data), 1,
1888 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1889 ompt_task_implicit);
1892 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1893 __ompt_lw_taskteam_unlink(master_th);
1894 if (ompt_enabled.ompt_callback_parallel_end) {
1895 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1896 &ompt_parallel_data, parent_task_data,
1897 OMPT_INVOKER(call_context) | ompt_parallel_team,
1900 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1904 }
else if (call_context == fork_context_gnu) {
1906 ompt_lw_taskteam_t lwt;
1907 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1910 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1911 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1916 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1919 KMP_ASSERT2(call_context < fork_context_last,
1920 "__kmp_fork_call: unknown fork_context parameter");
1923 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1930 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1931 "curtask=%p, curtask_max_aclevel=%d\n",
1932 parent_team->t.t_active_level, master_th,
1933 master_th->th.th_current_task,
1934 master_th->th.th_current_task->td_icvs.max_active_levels));
1938 master_th->th.th_current_task->td_flags.executing = 0;
1940 if (!master_th->th.th_teams_microtask || level > teams_level) {
1942 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1946 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1947 if ((level + 1 < __kmp_nested_nth.used) &&
1948 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1949 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1955 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1956 kmp_proc_bind_t proc_bind_icv =
1958 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1959 proc_bind = proc_bind_false;
1961 if (proc_bind == proc_bind_default) {
1964 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1970 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1971 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1972 master_th->th.th_current_task->td_icvs.proc_bind)) {
1973 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1978 master_th->th.th_set_proc_bind = proc_bind_default;
1980 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
1981 kmp_internal_control_t new_icvs;
1982 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1983 new_icvs.next = NULL;
1984 if (nthreads_icv > 0) {
1985 new_icvs.nproc = nthreads_icv;
1987 if (proc_bind_icv != proc_bind_default) {
1988 new_icvs.proc_bind = proc_bind_icv;
1992 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
1993 team = __kmp_allocate_team(root, nthreads, nthreads,
1997 proc_bind, &new_icvs,
1998 argc USE_NESTED_HOT_ARG(master_th));
1999 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2000 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2003 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2004 team = __kmp_allocate_team(root, nthreads, nthreads,
2009 &master_th->th.th_current_task->td_icvs,
2010 argc USE_NESTED_HOT_ARG(master_th));
2011 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2012 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2013 &master_th->th.th_current_task->td_icvs);
2016 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2019 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2020 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2021 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2022 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2023 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2025 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2028 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2030 if (!master_th->th.th_teams_microtask || level > teams_level) {
2031 int new_level = parent_team->t.t_level + 1;
2032 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2033 new_level = parent_team->t.t_active_level + 1;
2034 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2037 int new_level = parent_team->t.t_level;
2038 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2039 new_level = parent_team->t.t_active_level;
2040 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2042 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2044 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2046 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2047 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2050 propagateFPControl(team);
2052 if (ompd_state & OMPD_ENABLE_BP)
2053 ompd_bp_parallel_begin();
2056 if (__kmp_tasking_mode != tskm_immediate_exec) {
2059 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2060 parent_team->t.t_task_team[master_th->th.th_task_state]);
2061 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2062 "%p, new task_team %p / team %p\n",
2063 __kmp_gtid_from_thread(master_th),
2064 master_th->th.th_task_team, parent_team,
2065 team->t.t_task_team[master_th->th.th_task_state], team));
2067 if (active_level || master_th->th.th_task_team) {
2069 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2070 if (master_th->th.th_task_state_top >=
2071 master_th->th.th_task_state_stack_sz) {
2072 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2073 kmp_uint8 *old_stack, *new_stack;
2075 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2076 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2077 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2079 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2083 old_stack = master_th->th.th_task_state_memo_stack;
2084 master_th->th.th_task_state_memo_stack = new_stack;
2085 master_th->th.th_task_state_stack_sz = new_size;
2086 __kmp_free(old_stack);
2090 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2091 master_th->th.th_task_state;
2092 master_th->th.th_task_state_top++;
2093 #if KMP_NESTED_HOT_TEAMS
2094 if (master_th->th.th_hot_teams &&
2095 active_level < __kmp_hot_teams_max_level &&
2096 team == master_th->th.th_hot_teams[active_level].hot_team) {
2098 master_th->th.th_task_state =
2100 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2103 master_th->th.th_task_state = 0;
2104 #if KMP_NESTED_HOT_TEAMS
2108 #if !KMP_NESTED_HOT_TEAMS
2109 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2110 (team == root->r.r_hot_team));
2116 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2117 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2119 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2120 (team->t.t_master_tid == 0 &&
2121 (team->t.t_parent == root->r.r_root_team ||
2122 team->t.t_parent->t.t_serialized)));
2126 argv = (
void **)team->t.t_argv;
2128 for (i = argc - 1; i >= 0; --i) {
2129 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2130 KMP_CHECK_UPDATE(*argv, new_argv);
2134 for (i = 0; i < argc; ++i) {
2136 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2141 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2142 if (!root->r.r_active)
2143 root->r.r_active = TRUE;
2145 __kmp_fork_team_threads(root, team, master_th, gtid);
2146 __kmp_setup_icv_copy(team, nthreads,
2147 &master_th->th.th_current_task->td_icvs, loc);
2150 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2153 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2156 if (team->t.t_active_level == 1
2157 && !master_th->th.th_teams_microtask) {
2159 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2160 (__kmp_forkjoin_frames_mode == 3 ||
2161 __kmp_forkjoin_frames_mode == 1)) {
2162 kmp_uint64 tmp_time = 0;
2163 if (__itt_get_timestamp_ptr)
2164 tmp_time = __itt_get_timestamp();
2166 master_th->th.th_frame_time = tmp_time;
2167 if (__kmp_forkjoin_frames_mode == 3)
2168 team->t.t_region_time = tmp_time;
2172 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2173 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2175 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2181 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2184 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2185 root, team, master_th, gtid));
2188 if (__itt_stack_caller_create_ptr) {
2191 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2192 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2193 }
else if (parent_team->t.t_serialized) {
2198 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2199 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2207 __kmp_internal_fork(loc, gtid, team);
2208 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2209 "master_th=%p, gtid=%d\n",
2210 root, team, master_th, gtid));
2213 if (call_context == fork_context_gnu) {
2214 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2219 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2220 team->t.t_id, team->t.t_pkfn));
2223 #if KMP_STATS_ENABLED
2227 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2231 if (!team->t.t_invoke(gtid)) {
2232 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2235 #if KMP_STATS_ENABLED
2238 KMP_SET_THREAD_STATE(previous_state);
2242 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2243 team->t.t_id, team->t.t_pkfn));
2246 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2248 if (ompt_enabled.enabled) {
2249 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2257 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2260 thread->th.ompt_thread_info.state =
2261 ((team->t.t_serialized) ? ompt_state_work_serial
2262 : ompt_state_work_parallel);
2265 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2266 kmp_team_t *team, ompt_data_t *parallel_data,
2267 int flags,
void *codeptr) {
2268 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2269 if (ompt_enabled.ompt_callback_parallel_end) {
2270 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2271 parallel_data, &(task_info->task_data), flags, codeptr);
2274 task_info->frame.enter_frame = ompt_data_none;
2275 __kmp_join_restore_state(thread, team);
2279 void __kmp_join_call(
ident_t *loc,
int gtid
2282 enum fork_context_e fork_context
2286 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2288 kmp_team_t *parent_team;
2289 kmp_info_t *master_th;
2293 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2296 master_th = __kmp_threads[gtid];
2297 root = master_th->th.th_root;
2298 team = master_th->th.th_team;
2299 parent_team = team->t.t_parent;
2301 master_th->th.th_ident = loc;
2304 void *team_microtask = (
void *)team->t.t_pkfn;
2308 if (ompt_enabled.enabled &&
2309 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2310 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2315 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2316 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2317 "th_task_team = %p\n",
2318 __kmp_gtid_from_thread(master_th), team,
2319 team->t.t_task_team[master_th->th.th_task_state],
2320 master_th->th.th_task_team));
2321 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2322 team->t.t_task_team[master_th->th.th_task_state]);
2326 if (team->t.t_serialized) {
2327 if (master_th->th.th_teams_microtask) {
2329 int level = team->t.t_level;
2330 int tlevel = master_th->th.th_teams_level;
2331 if (level == tlevel) {
2335 }
else if (level == tlevel + 1) {
2339 team->t.t_serialized++;
2345 if (ompt_enabled.enabled) {
2346 __kmp_join_restore_state(master_th, parent_team);
2353 master_active = team->t.t_master_active;
2358 __kmp_internal_join(loc, gtid, team);
2360 if (__itt_stack_caller_create_ptr) {
2361 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2363 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2364 team->t.t_stack_id = NULL;
2368 master_th->th.th_task_state =
2371 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2372 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2376 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2377 parent_team->t.t_stack_id = NULL;
2381 if (team->t.t_nproc > 1 &&
2382 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2383 team->t.b->update_num_threads(team->t.t_nproc);
2384 __kmp_add_threads_to_team(team, team->t.t_nproc);
2391 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2392 void *codeptr = team->t.ompt_team_info.master_return_address;
2397 if (team->t.t_active_level == 1 &&
2398 (!master_th->th.th_teams_microtask ||
2399 master_th->th.th_teams_size.nteams == 1)) {
2400 master_th->th.th_ident = loc;
2403 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2404 __kmp_forkjoin_frames_mode == 3)
2405 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2406 master_th->th.th_frame_time, 0, loc,
2407 master_th->th.th_team_nproc, 1);
2408 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2409 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2410 __kmp_itt_region_joined(gtid);
2414 if (master_th->th.th_teams_microtask && !exit_teams &&
2415 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2416 team->t.t_level == master_th->th.th_teams_level + 1) {
2421 ompt_data_t ompt_parallel_data = ompt_data_none;
2422 if (ompt_enabled.enabled) {
2423 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2424 if (ompt_enabled.ompt_callback_implicit_task) {
2425 int ompt_team_size = team->t.t_nproc;
2426 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2427 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2428 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2430 task_info->frame.exit_frame = ompt_data_none;
2431 task_info->task_data = ompt_data_none;
2432 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2433 __ompt_lw_taskteam_unlink(master_th);
2438 team->t.t_active_level--;
2439 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2445 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2446 int old_num = master_th->th.th_team_nproc;
2447 int new_num = master_th->th.th_teams_size.nth;
2448 kmp_info_t **other_threads = team->t.t_threads;
2449 team->t.t_nproc = new_num;
2450 for (
int i = 0; i < old_num; ++i) {
2451 other_threads[i]->th.th_team_nproc = new_num;
2454 for (
int i = old_num; i < new_num; ++i) {
2456 KMP_DEBUG_ASSERT(other_threads[i]);
2457 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2458 for (
int b = 0; b < bs_last_barrier; ++b) {
2459 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2460 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2462 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2465 if (__kmp_tasking_mode != tskm_immediate_exec) {
2467 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2473 if (ompt_enabled.enabled) {
2474 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2475 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2483 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2484 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2486 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2491 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2493 if (!master_th->th.th_teams_microtask ||
2494 team->t.t_level > master_th->th.th_teams_level) {
2496 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2498 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2501 if (ompt_enabled.enabled) {
2502 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2503 if (ompt_enabled.ompt_callback_implicit_task) {
2504 int flags = (team_microtask == (
void *)__kmp_teams_master)
2506 : ompt_task_implicit;
2507 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2508 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2509 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2510 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2512 task_info->frame.exit_frame = ompt_data_none;
2513 task_info->task_data = ompt_data_none;
2517 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2519 __kmp_pop_current_task_from_thread(master_th);
2521 #if KMP_AFFINITY_SUPPORTED
2523 master_th->th.th_first_place = team->t.t_first_place;
2524 master_th->th.th_last_place = team->t.t_last_place;
2526 master_th->th.th_def_allocator = team->t.t_def_allocator;
2529 if (ompd_state & OMPD_ENABLE_BP)
2530 ompd_bp_parallel_end();
2532 updateHWFPControl(team);
2534 if (root->r.r_active != master_active)
2535 root->r.r_active = master_active;
2537 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2545 master_th->th.th_team = parent_team;
2546 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2547 master_th->th.th_team_master = parent_team->t.t_threads[0];
2548 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2551 if (parent_team->t.t_serialized &&
2552 parent_team != master_th->th.th_serial_team &&
2553 parent_team != root->r.r_root_team) {
2554 __kmp_free_team(root,
2555 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2556 master_th->th.th_serial_team = parent_team;
2559 if (__kmp_tasking_mode != tskm_immediate_exec) {
2560 if (master_th->th.th_task_state_top >
2562 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2564 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2565 master_th->th.th_task_state;
2566 --master_th->th.th_task_state_top;
2568 master_th->th.th_task_state =
2570 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2573 master_th->th.th_task_team =
2574 parent_team->t.t_task_team[master_th->th.th_task_state];
2576 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2577 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2584 master_th->th.th_current_task->td_flags.executing = 1;
2586 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2590 OMPT_INVOKER(fork_context) |
2591 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2592 : ompt_parallel_team);
2593 if (ompt_enabled.enabled) {
2594 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2600 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2605 void __kmp_save_internal_controls(kmp_info_t *thread) {
2607 if (thread->th.th_team != thread->th.th_serial_team) {
2610 if (thread->th.th_team->t.t_serialized > 1) {
2613 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2616 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2617 thread->th.th_team->t.t_serialized) {
2622 kmp_internal_control_t *control =
2623 (kmp_internal_control_t *)__kmp_allocate(
2624 sizeof(kmp_internal_control_t));
2626 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2628 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2630 control->next = thread->th.th_team->t.t_control_stack_top;
2631 thread->th.th_team->t.t_control_stack_top = control;
2637 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2641 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2642 KMP_DEBUG_ASSERT(__kmp_init_serial);
2646 else if (new_nth > __kmp_max_nth)
2647 new_nth = __kmp_max_nth;
2650 thread = __kmp_threads[gtid];
2651 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2654 __kmp_save_internal_controls(thread);
2656 set__nproc(thread, new_nth);
2661 root = thread->th.th_root;
2662 if (__kmp_init_parallel && (!root->r.r_active) &&
2663 (root->r.r_hot_team->t.t_nproc > new_nth)
2664 #
if KMP_NESTED_HOT_TEAMS
2665 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2668 kmp_team_t *hot_team = root->r.r_hot_team;
2671 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2673 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2674 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2677 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2678 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2679 if (__kmp_tasking_mode != tskm_immediate_exec) {
2682 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2684 __kmp_free_thread(hot_team->t.t_threads[f]);
2685 hot_team->t.t_threads[f] = NULL;
2687 hot_team->t.t_nproc = new_nth;
2688 #if KMP_NESTED_HOT_TEAMS
2689 if (thread->th.th_hot_teams) {
2690 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2691 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2695 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2696 hot_team->t.b->update_num_threads(new_nth);
2697 __kmp_add_threads_to_team(hot_team, new_nth);
2700 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2703 for (f = 0; f < new_nth; f++) {
2704 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2705 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2708 hot_team->t.t_size_changed = -1;
2713 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2716 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2718 gtid, max_active_levels));
2719 KMP_DEBUG_ASSERT(__kmp_init_serial);
2722 if (max_active_levels < 0) {
2723 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2728 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2729 "max_active_levels for thread %d = (%d)\n",
2730 gtid, max_active_levels));
2733 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2738 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2739 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2740 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2746 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2747 "max_active_levels for thread %d = (%d)\n",
2748 gtid, max_active_levels));
2750 thread = __kmp_threads[gtid];
2752 __kmp_save_internal_controls(thread);
2754 set__max_active_levels(thread, max_active_levels);
2758 int __kmp_get_max_active_levels(
int gtid) {
2761 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2762 KMP_DEBUG_ASSERT(__kmp_init_serial);
2764 thread = __kmp_threads[gtid];
2765 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2766 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2767 "curtask_maxaclevel=%d\n",
2768 gtid, thread->th.th_current_task,
2769 thread->th.th_current_task->td_icvs.max_active_levels));
2770 return thread->th.th_current_task->td_icvs.max_active_levels;
2774 void __kmp_set_num_teams(
int num_teams) {
2776 __kmp_nteams = num_teams;
2778 int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2780 void __kmp_set_teams_thread_limit(
int limit) {
2782 __kmp_teams_thread_limit = limit;
2784 int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2786 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2787 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2790 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2792 kmp_sched_t orig_kind;
2795 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2796 gtid, (
int)kind, chunk));
2797 KMP_DEBUG_ASSERT(__kmp_init_serial);
2804 kind = __kmp_sched_without_mods(kind);
2806 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2807 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2809 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2810 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2812 kind = kmp_sched_default;
2816 thread = __kmp_threads[gtid];
2818 __kmp_save_internal_controls(thread);
2820 if (kind < kmp_sched_upper_std) {
2821 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2824 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2826 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2827 __kmp_sch_map[kind - kmp_sched_lower - 1];
2832 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2833 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2834 kmp_sched_lower - 2];
2836 __kmp_sched_apply_mods_intkind(
2837 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2838 if (kind == kmp_sched_auto || chunk < 1) {
2840 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2842 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2847 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2851 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2852 KMP_DEBUG_ASSERT(__kmp_init_serial);
2854 thread = __kmp_threads[gtid];
2856 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2857 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2859 case kmp_sch_static_greedy:
2860 case kmp_sch_static_balanced:
2861 *kind = kmp_sched_static;
2862 __kmp_sched_apply_mods_stdkind(kind, th_type);
2865 case kmp_sch_static_chunked:
2866 *kind = kmp_sched_static;
2868 case kmp_sch_dynamic_chunked:
2869 *kind = kmp_sched_dynamic;
2872 case kmp_sch_guided_iterative_chunked:
2873 case kmp_sch_guided_analytical_chunked:
2874 *kind = kmp_sched_guided;
2877 *kind = kmp_sched_auto;
2879 case kmp_sch_trapezoidal:
2880 *kind = kmp_sched_trapezoidal;
2882 #if KMP_STATIC_STEAL_ENABLED
2883 case kmp_sch_static_steal:
2884 *kind = kmp_sched_static_steal;
2888 KMP_FATAL(UnknownSchedulingType, th_type);
2891 __kmp_sched_apply_mods_stdkind(kind, th_type);
2892 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2895 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2901 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2902 KMP_DEBUG_ASSERT(__kmp_init_serial);
2909 thr = __kmp_threads[gtid];
2910 team = thr->th.th_team;
2911 ii = team->t.t_level;
2915 if (thr->th.th_teams_microtask) {
2917 int tlevel = thr->th.th_teams_level;
2920 KMP_DEBUG_ASSERT(ii >= tlevel);
2932 return __kmp_tid_from_gtid(gtid);
2934 dd = team->t.t_serialized;
2936 while (ii > level) {
2937 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2939 if ((team->t.t_serialized) && (!dd)) {
2940 team = team->t.t_parent;
2944 team = team->t.t_parent;
2945 dd = team->t.t_serialized;
2950 return (dd > 1) ? (0) : (team->t.t_master_tid);
2953 int __kmp_get_team_size(
int gtid,
int level) {
2959 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2960 KMP_DEBUG_ASSERT(__kmp_init_serial);
2967 thr = __kmp_threads[gtid];
2968 team = thr->th.th_team;
2969 ii = team->t.t_level;
2973 if (thr->th.th_teams_microtask) {
2975 int tlevel = thr->th.th_teams_level;
2978 KMP_DEBUG_ASSERT(ii >= tlevel);
2989 while (ii > level) {
2990 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2992 if (team->t.t_serialized && (!dd)) {
2993 team = team->t.t_parent;
2997 team = team->t.t_parent;
3002 return team->t.t_nproc;
3005 kmp_r_sched_t __kmp_get_schedule_global() {
3010 kmp_r_sched_t r_sched;
3016 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3017 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3020 r_sched.r_sched_type = __kmp_static;
3023 r_sched.r_sched_type = __kmp_guided;
3025 r_sched.r_sched_type = __kmp_sched;
3027 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3029 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3031 r_sched.chunk = KMP_DEFAULT_CHUNK;
3033 r_sched.chunk = __kmp_chunk;
3041 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3043 KMP_DEBUG_ASSERT(team);
3044 if (!realloc || argc > team->t.t_max_argc) {
3046 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3047 "current entries=%d\n",
3048 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3050 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3051 __kmp_free((
void *)team->t.t_argv);
3053 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3055 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3056 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3058 team->t.t_id, team->t.t_max_argc));
3059 team->t.t_argv = &team->t.t_inline_argv[0];
3060 if (__kmp_storage_map) {
3061 __kmp_print_storage_map_gtid(
3062 -1, &team->t.t_inline_argv[0],
3063 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3064 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3069 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3070 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3072 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3074 team->t.t_id, team->t.t_max_argc));
3076 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3077 if (__kmp_storage_map) {
3078 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3079 &team->t.t_argv[team->t.t_max_argc],
3080 sizeof(
void *) * team->t.t_max_argc,
3081 "team_%d.t_argv", team->t.t_id);
3087 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3089 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3091 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3092 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3093 sizeof(dispatch_shared_info_t) * num_disp_buff);
3094 team->t.t_dispatch =
3095 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3096 team->t.t_implicit_task_taskdata =
3097 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3098 team->t.t_max_nproc = max_nth;
3101 for (i = 0; i < num_disp_buff; ++i) {
3102 team->t.t_disp_buffer[i].buffer_index = i;
3103 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3107 static void __kmp_free_team_arrays(kmp_team_t *team) {
3110 for (i = 0; i < team->t.t_max_nproc; ++i) {
3111 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3112 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3113 team->t.t_dispatch[i].th_disp_buffer = NULL;
3116 #if KMP_USE_HIER_SCHED
3117 __kmp_dispatch_free_hierarchies(team);
3119 __kmp_free(team->t.t_threads);
3120 __kmp_free(team->t.t_disp_buffer);
3121 __kmp_free(team->t.t_dispatch);
3122 __kmp_free(team->t.t_implicit_task_taskdata);
3123 team->t.t_threads = NULL;
3124 team->t.t_disp_buffer = NULL;
3125 team->t.t_dispatch = NULL;
3126 team->t.t_implicit_task_taskdata = 0;
3129 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3130 kmp_info_t **oldThreads = team->t.t_threads;
3132 __kmp_free(team->t.t_disp_buffer);
3133 __kmp_free(team->t.t_dispatch);
3134 __kmp_free(team->t.t_implicit_task_taskdata);
3135 __kmp_allocate_team_arrays(team, max_nth);
3137 KMP_MEMCPY(team->t.t_threads, oldThreads,
3138 team->t.t_nproc *
sizeof(kmp_info_t *));
3140 __kmp_free(oldThreads);
3143 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3145 kmp_r_sched_t r_sched =
3146 __kmp_get_schedule_global();
3148 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3150 kmp_internal_control_t g_icvs = {
3152 (kmp_int8)__kmp_global.g.g_dynamic,
3154 (kmp_int8)__kmp_env_blocktime,
3156 __kmp_dflt_blocktime,
3161 __kmp_dflt_team_nth,
3165 __kmp_dflt_max_active_levels,
3169 __kmp_nested_proc_bind.bind_types[0],
3170 __kmp_default_device,
3177 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3179 kmp_internal_control_t gx_icvs;
3180 gx_icvs.serial_nesting_level =
3182 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3183 gx_icvs.next = NULL;
3188 static void __kmp_initialize_root(kmp_root_t *root) {
3190 kmp_team_t *root_team;
3191 kmp_team_t *hot_team;
3192 int hot_team_max_nth;
3193 kmp_r_sched_t r_sched =
3194 __kmp_get_schedule_global();
3195 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3196 KMP_DEBUG_ASSERT(root);
3197 KMP_ASSERT(!root->r.r_begin);
3200 __kmp_init_lock(&root->r.r_begin_lock);
3201 root->r.r_begin = FALSE;
3202 root->r.r_active = FALSE;
3203 root->r.r_in_parallel = 0;
3204 root->r.r_blocktime = __kmp_dflt_blocktime;
3205 #if KMP_AFFINITY_SUPPORTED
3206 root->r.r_affinity_assigned = FALSE;
3211 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3214 __kmp_allocate_team(root,
3220 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3222 USE_NESTED_HOT_ARG(NULL)
3227 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3230 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3232 root->r.r_root_team = root_team;
3233 root_team->t.t_control_stack_top = NULL;
3236 root_team->t.t_threads[0] = NULL;
3237 root_team->t.t_nproc = 1;
3238 root_team->t.t_serialized = 1;
3240 root_team->t.t_sched.sched = r_sched.sched;
3243 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3244 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3248 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3251 __kmp_allocate_team(root,
3253 __kmp_dflt_team_nth_ub * 2,
3257 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3259 USE_NESTED_HOT_ARG(NULL)
3261 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3263 root->r.r_hot_team = hot_team;
3264 root_team->t.t_control_stack_top = NULL;
3267 hot_team->t.t_parent = root_team;
3270 hot_team_max_nth = hot_team->t.t_max_nproc;
3271 for (f = 0; f < hot_team_max_nth; ++f) {
3272 hot_team->t.t_threads[f] = NULL;
3274 hot_team->t.t_nproc = 1;
3276 hot_team->t.t_sched.sched = r_sched.sched;
3277 hot_team->t.t_size_changed = 0;
3282 typedef struct kmp_team_list_item {
3283 kmp_team_p
const *entry;
3284 struct kmp_team_list_item *next;
3285 } kmp_team_list_item_t;
3286 typedef kmp_team_list_item_t *kmp_team_list_t;
3288 static void __kmp_print_structure_team_accum(
3289 kmp_team_list_t list,
3290 kmp_team_p
const *team
3300 KMP_DEBUG_ASSERT(list != NULL);
3305 __kmp_print_structure_team_accum(list, team->t.t_parent);
3306 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3310 while (l->next != NULL && l->entry != team) {
3313 if (l->next != NULL) {
3319 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3325 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3326 sizeof(kmp_team_list_item_t));
3333 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3336 __kmp_printf(
"%s", title);
3338 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3340 __kmp_printf(
" - (nil)\n");
3344 static void __kmp_print_structure_thread(
char const *title,
3345 kmp_info_p
const *thread) {
3346 __kmp_printf(
"%s", title);
3347 if (thread != NULL) {
3348 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3350 __kmp_printf(
" - (nil)\n");
3354 void __kmp_print_structure(
void) {
3356 kmp_team_list_t list;
3360 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3364 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3365 "Table\n------------------------------\n");
3368 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3369 __kmp_printf(
"%2d", gtid);
3370 if (__kmp_threads != NULL) {
3371 __kmp_printf(
" %p", __kmp_threads[gtid]);
3373 if (__kmp_root != NULL) {
3374 __kmp_printf(
" %p", __kmp_root[gtid]);
3381 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3383 if (__kmp_threads != NULL) {
3385 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3386 kmp_info_t
const *thread = __kmp_threads[gtid];
3387 if (thread != NULL) {
3388 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3389 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3390 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3391 __kmp_print_structure_team(
" Serial Team: ",
3392 thread->th.th_serial_team);
3393 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3394 __kmp_print_structure_thread(
" Primary: ",
3395 thread->th.th_team_master);
3396 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3397 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3398 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3399 __kmp_print_structure_thread(
" Next in pool: ",
3400 thread->th.th_next_pool);
3402 __kmp_print_structure_team_accum(list, thread->th.th_team);
3403 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3407 __kmp_printf(
"Threads array is not allocated.\n");
3411 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3413 if (__kmp_root != NULL) {
3415 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3416 kmp_root_t
const *root = __kmp_root[gtid];
3418 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3419 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3420 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3421 __kmp_print_structure_thread(
" Uber Thread: ",
3422 root->r.r_uber_thread);
3423 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3424 __kmp_printf(
" In Parallel: %2d\n",
3425 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3427 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3428 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3432 __kmp_printf(
"Ubers array is not allocated.\n");
3435 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3437 while (list->next != NULL) {
3438 kmp_team_p
const *team = list->entry;
3440 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3441 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3442 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3443 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3444 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3445 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3446 for (i = 0; i < team->t.t_nproc; ++i) {
3447 __kmp_printf(
" Thread %2d: ", i);
3448 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3450 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3456 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3458 __kmp_print_structure_thread(
"Thread pool: ",
3459 CCAST(kmp_info_t *, __kmp_thread_pool));
3460 __kmp_print_structure_team(
"Team pool: ",
3461 CCAST(kmp_team_t *, __kmp_team_pool));
3465 while (list != NULL) {
3466 kmp_team_list_item_t *item = list;
3468 KMP_INTERNAL_FREE(item);
3477 static const unsigned __kmp_primes[] = {
3478 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3479 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3480 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3481 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3482 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3483 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3484 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3485 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3486 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3487 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3488 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3492 unsigned short __kmp_get_random(kmp_info_t *thread) {
3493 unsigned x = thread->th.th_x;
3494 unsigned short r = (
unsigned short)(x >> 16);
3496 thread->th.th_x = x * thread->th.th_a + 1;
3498 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3499 thread->th.th_info.ds.ds_tid, r));
3505 void __kmp_init_random(kmp_info_t *thread) {
3506 unsigned seed = thread->th.th_info.ds.ds_tid;
3509 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3510 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3512 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3518 static int __kmp_reclaim_dead_roots(
void) {
3521 for (i = 0; i < __kmp_threads_capacity; ++i) {
3522 if (KMP_UBER_GTID(i) &&
3523 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3526 r += __kmp_unregister_root_other_thread(i);
3551 static int __kmp_expand_threads(
int nNeed) {
3553 int minimumRequiredCapacity;
3555 kmp_info_t **newThreads;
3556 kmp_root_t **newRoot;
3562 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3565 added = __kmp_reclaim_dead_roots();
3594 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3597 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3601 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3603 newCapacity = __kmp_threads_capacity;
3605 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3606 : __kmp_sys_max_nth;
3607 }
while (newCapacity < minimumRequiredCapacity);
3608 newThreads = (kmp_info_t **)__kmp_allocate(
3609 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3611 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3612 KMP_MEMCPY(newThreads, __kmp_threads,
3613 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3614 KMP_MEMCPY(newRoot, __kmp_root,
3615 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3617 kmp_info_t **temp_threads = __kmp_threads;
3618 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3619 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3620 __kmp_free(temp_threads);
3621 added += newCapacity - __kmp_threads_capacity;
3622 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3624 if (newCapacity > __kmp_tp_capacity) {
3625 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3626 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3627 __kmp_threadprivate_resize_cache(newCapacity);
3629 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3631 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3640 int __kmp_register_root(
int initial_thread) {
3641 kmp_info_t *root_thread;
3645 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3646 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3663 capacity = __kmp_threads_capacity;
3664 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3671 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3672 capacity -= __kmp_hidden_helper_threads_num;
3676 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3677 if (__kmp_tp_cached) {
3678 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3679 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3680 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3682 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3692 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3695 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3696 gtid <= __kmp_hidden_helper_threads_num;
3699 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3700 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3701 "hidden helper thread: T#%d\n",
3707 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3710 for (gtid = __kmp_hidden_helper_threads_num + 1;
3711 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3715 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3716 KMP_ASSERT(gtid < __kmp_threads_capacity);
3721 TCW_4(__kmp_nth, __kmp_nth + 1);
3725 if (__kmp_adjust_gtid_mode) {
3726 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3727 if (TCR_4(__kmp_gtid_mode) != 2) {
3728 TCW_4(__kmp_gtid_mode, 2);
3731 if (TCR_4(__kmp_gtid_mode) != 1) {
3732 TCW_4(__kmp_gtid_mode, 1);
3737 #ifdef KMP_ADJUST_BLOCKTIME
3740 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3741 if (__kmp_nth > __kmp_avail_proc) {
3742 __kmp_zero_bt = TRUE;
3748 if (!(root = __kmp_root[gtid])) {
3749 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3750 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3753 #if KMP_STATS_ENABLED
3755 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3756 __kmp_stats_thread_ptr->startLife();
3757 KMP_SET_THREAD_STATE(SERIAL_REGION);
3760 __kmp_initialize_root(root);
3763 if (root->r.r_uber_thread) {
3764 root_thread = root->r.r_uber_thread;
3766 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3767 if (__kmp_storage_map) {
3768 __kmp_print_thread_storage_map(root_thread, gtid);
3770 root_thread->th.th_info.ds.ds_gtid = gtid;
3772 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3774 root_thread->th.th_root = root;
3775 if (__kmp_env_consistency_check) {
3776 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3779 __kmp_initialize_fast_memory(root_thread);
3783 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3784 __kmp_initialize_bget(root_thread);
3786 __kmp_init_random(root_thread);
3790 if (!root_thread->th.th_serial_team) {
3791 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3792 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3793 root_thread->th.th_serial_team = __kmp_allocate_team(
3798 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3800 KMP_ASSERT(root_thread->th.th_serial_team);
3801 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3802 root_thread->th.th_serial_team));
3805 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3807 root->r.r_root_team->t.t_threads[0] = root_thread;
3808 root->r.r_hot_team->t.t_threads[0] = root_thread;
3809 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3811 root_thread->th.th_serial_team->t.t_serialized = 0;
3812 root->r.r_uber_thread = root_thread;
3815 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3816 TCW_4(__kmp_init_gtid, TRUE);
3819 __kmp_gtid_set_specific(gtid);
3822 __kmp_itt_thread_name(gtid);
3825 #ifdef KMP_TDATA_GTID
3828 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3829 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3831 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3833 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3834 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3835 KMP_INIT_BARRIER_STATE));
3838 for (b = 0; b < bs_last_barrier; ++b) {
3839 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3841 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3845 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3846 KMP_INIT_BARRIER_STATE);
3848 #if KMP_AFFINITY_SUPPORTED
3849 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3850 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3851 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3852 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3854 root_thread->th.th_def_allocator = __kmp_def_allocator;
3855 root_thread->th.th_prev_level = 0;
3856 root_thread->th.th_prev_num_threads = 1;
3858 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3859 tmp->cg_root = root_thread;
3860 tmp->cg_thread_limit = __kmp_cg_max_nth;
3861 tmp->cg_nthreads = 1;
3862 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3863 " cg_nthreads init to 1\n",
3866 root_thread->th.th_cg_roots = tmp;
3868 __kmp_root_counter++;
3871 if (!initial_thread && ompt_enabled.enabled) {
3873 kmp_info_t *root_thread = ompt_get_thread();
3875 ompt_set_thread_state(root_thread, ompt_state_overhead);
3877 if (ompt_enabled.ompt_callback_thread_begin) {
3878 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3879 ompt_thread_initial, __ompt_get_thread_data_internal());
3881 ompt_data_t *task_data;
3882 ompt_data_t *parallel_data;
3883 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
3885 if (ompt_enabled.ompt_callback_implicit_task) {
3886 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3887 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3890 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3894 if (ompd_state & OMPD_ENABLE_BP)
3895 ompd_bp_thread_begin();
3899 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3904 #if KMP_NESTED_HOT_TEAMS
3905 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3906 const int max_level) {
3908 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3909 if (!hot_teams || !hot_teams[level].hot_team) {
3912 KMP_DEBUG_ASSERT(level < max_level);
3913 kmp_team_t *team = hot_teams[level].hot_team;
3914 nth = hot_teams[level].hot_team_nth;
3916 if (level < max_level - 1) {
3917 for (i = 0; i < nth; ++i) {
3918 kmp_info_t *th = team->t.t_threads[i];
3919 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3920 if (i > 0 && th->th.th_hot_teams) {
3921 __kmp_free(th->th.th_hot_teams);
3922 th->th.th_hot_teams = NULL;
3926 __kmp_free_team(root, team, NULL);
3933 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3934 kmp_team_t *root_team = root->r.r_root_team;
3935 kmp_team_t *hot_team = root->r.r_hot_team;
3936 int n = hot_team->t.t_nproc;
3939 KMP_DEBUG_ASSERT(!root->r.r_active);
3941 root->r.r_root_team = NULL;
3942 root->r.r_hot_team = NULL;
3945 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3946 #if KMP_NESTED_HOT_TEAMS
3947 if (__kmp_hot_teams_max_level >
3949 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3950 kmp_info_t *th = hot_team->t.t_threads[i];
3951 if (__kmp_hot_teams_max_level > 1) {
3952 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3954 if (th->th.th_hot_teams) {
3955 __kmp_free(th->th.th_hot_teams);
3956 th->th.th_hot_teams = NULL;
3961 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3966 if (__kmp_tasking_mode != tskm_immediate_exec) {
3967 __kmp_wait_to_unref_task_teams();
3973 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3975 (LPVOID) & (root->r.r_uber_thread->th),
3976 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3977 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3981 if (ompd_state & OMPD_ENABLE_BP)
3982 ompd_bp_thread_end();
3986 ompt_data_t *task_data;
3987 ompt_data_t *parallel_data;
3988 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
3990 if (ompt_enabled.ompt_callback_implicit_task) {
3991 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3992 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
3994 if (ompt_enabled.ompt_callback_thread_end) {
3995 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3996 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4002 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4003 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4005 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4006 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4009 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4010 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4011 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4012 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4013 root->r.r_uber_thread->th.th_cg_roots = NULL;
4015 __kmp_reap_thread(root->r.r_uber_thread, 1);
4019 root->r.r_uber_thread = NULL;
4021 root->r.r_begin = FALSE;
4026 void __kmp_unregister_root_current_thread(
int gtid) {
4027 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4031 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4032 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4033 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4036 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4039 kmp_root_t *root = __kmp_root[gtid];
4041 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4042 KMP_ASSERT(KMP_UBER_GTID(gtid));
4043 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4044 KMP_ASSERT(root->r.r_active == FALSE);
4048 kmp_info_t *thread = __kmp_threads[gtid];
4049 kmp_team_t *team = thread->th.th_team;
4050 kmp_task_team_t *task_team = thread->th.th_task_team;
4053 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
4056 thread->th.ompt_thread_info.state = ompt_state_undefined;
4058 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4061 __kmp_reset_root(gtid, root);
4065 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4067 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4074 static int __kmp_unregister_root_other_thread(
int gtid) {
4075 kmp_root_t *root = __kmp_root[gtid];
4078 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4079 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4080 KMP_ASSERT(KMP_UBER_GTID(gtid));
4081 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4082 KMP_ASSERT(root->r.r_active == FALSE);
4084 r = __kmp_reset_root(gtid, root);
4086 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4092 void __kmp_task_info() {
4094 kmp_int32 gtid = __kmp_entry_gtid();
4095 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4096 kmp_info_t *this_thr = __kmp_threads[gtid];
4097 kmp_team_t *steam = this_thr->th.th_serial_team;
4098 kmp_team_t *team = this_thr->th.th_team;
4101 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4103 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4104 team->t.t_implicit_task_taskdata[tid].td_parent);
4111 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4112 int tid,
int gtid) {
4116 KMP_DEBUG_ASSERT(this_thr != NULL);
4117 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4118 KMP_DEBUG_ASSERT(team);
4119 KMP_DEBUG_ASSERT(team->t.t_threads);
4120 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4121 kmp_info_t *master = team->t.t_threads[0];
4122 KMP_DEBUG_ASSERT(master);
4123 KMP_DEBUG_ASSERT(master->th.th_root);
4127 TCW_SYNC_PTR(this_thr->th.th_team, team);
4129 this_thr->th.th_info.ds.ds_tid = tid;
4130 this_thr->th.th_set_nproc = 0;
4131 if (__kmp_tasking_mode != tskm_immediate_exec)
4134 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4136 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4137 this_thr->th.th_set_proc_bind = proc_bind_default;
4138 #if KMP_AFFINITY_SUPPORTED
4139 this_thr->th.th_new_place = this_thr->th.th_current_place;
4141 this_thr->th.th_root = master->th.th_root;
4144 this_thr->th.th_team_nproc = team->t.t_nproc;
4145 this_thr->th.th_team_master = master;
4146 this_thr->th.th_team_serialized = team->t.t_serialized;
4148 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4150 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4151 tid, gtid, this_thr, this_thr->th.th_current_task));
4153 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4156 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4157 tid, gtid, this_thr, this_thr->th.th_current_task));
4162 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4164 this_thr->th.th_local.this_construct = 0;
4166 if (!this_thr->th.th_pri_common) {
4167 this_thr->th.th_pri_common =
4168 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4169 if (__kmp_storage_map) {
4170 __kmp_print_storage_map_gtid(
4171 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4172 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4174 this_thr->th.th_pri_head = NULL;
4177 if (this_thr != master &&
4178 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4180 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4181 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4184 int i = tmp->cg_nthreads--;
4185 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4186 " on node %p of thread %p to %d\n",
4187 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4192 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4194 this_thr->th.th_cg_roots->cg_nthreads++;
4195 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4196 " node %p of thread %p to %d\n",
4197 this_thr, this_thr->th.th_cg_roots,
4198 this_thr->th.th_cg_roots->cg_root,
4199 this_thr->th.th_cg_roots->cg_nthreads));
4200 this_thr->th.th_current_task->td_icvs.thread_limit =
4201 this_thr->th.th_cg_roots->cg_thread_limit;
4206 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4209 sizeof(dispatch_private_info_t) *
4210 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4211 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4212 team->t.t_max_nproc));
4213 KMP_ASSERT(dispatch);
4214 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4215 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4217 dispatch->th_disp_index = 0;
4218 dispatch->th_doacross_buf_idx = 0;
4219 if (!dispatch->th_disp_buffer) {
4220 dispatch->th_disp_buffer =
4221 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4223 if (__kmp_storage_map) {
4224 __kmp_print_storage_map_gtid(
4225 gtid, &dispatch->th_disp_buffer[0],
4226 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4228 : __kmp_dispatch_num_buffers],
4230 "th_%d.th_dispatch.th_disp_buffer "
4231 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4232 gtid, team->t.t_id, gtid);
4235 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4238 dispatch->th_dispatch_pr_current = 0;
4239 dispatch->th_dispatch_sh_current = 0;
4241 dispatch->th_deo_fcn = 0;
4242 dispatch->th_dxo_fcn = 0;
4245 this_thr->th.th_next_pool = NULL;
4247 if (!this_thr->th.th_task_state_memo_stack) {
4249 this_thr->th.th_task_state_memo_stack =
4250 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4251 this_thr->th.th_task_state_top = 0;
4252 this_thr->th.th_task_state_stack_sz = 4;
4253 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4255 this_thr->th.th_task_state_memo_stack[i] = 0;
4258 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4259 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4269 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4271 kmp_team_t *serial_team;
4272 kmp_info_t *new_thr;
4275 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4276 KMP_DEBUG_ASSERT(root && team);
4277 #if !KMP_NESTED_HOT_TEAMS
4278 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4283 if (__kmp_thread_pool) {
4284 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4285 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4286 if (new_thr == __kmp_thread_pool_insert_pt) {
4287 __kmp_thread_pool_insert_pt = NULL;
4289 TCW_4(new_thr->th.th_in_pool, FALSE);
4290 __kmp_suspend_initialize_thread(new_thr);
4291 __kmp_lock_suspend_mx(new_thr);
4292 if (new_thr->th.th_active_in_pool == TRUE) {
4293 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4294 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4295 new_thr->th.th_active_in_pool = FALSE;
4297 __kmp_unlock_suspend_mx(new_thr);
4299 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4300 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4301 KMP_ASSERT(!new_thr->th.th_team);
4302 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4305 __kmp_initialize_info(new_thr, team, new_tid,
4306 new_thr->th.th_info.ds.ds_gtid);
4307 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4309 TCW_4(__kmp_nth, __kmp_nth + 1);
4311 new_thr->th.th_task_state = 0;
4312 new_thr->th.th_task_state_top = 0;
4313 new_thr->th.th_task_state_stack_sz = 4;
4315 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4317 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4321 #ifdef KMP_ADJUST_BLOCKTIME
4324 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4325 if (__kmp_nth > __kmp_avail_proc) {
4326 __kmp_zero_bt = TRUE;
4335 kmp_balign_t *balign = new_thr->th.th_bar;
4336 for (b = 0; b < bs_last_barrier; ++b)
4337 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4340 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4341 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4348 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4349 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4354 if (!TCR_4(__kmp_init_monitor)) {
4355 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4356 if (!TCR_4(__kmp_init_monitor)) {
4357 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4358 TCW_4(__kmp_init_monitor, 1);
4359 __kmp_create_monitor(&__kmp_monitor);
4360 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4371 while (TCR_4(__kmp_init_monitor) < 2) {
4374 KF_TRACE(10, (
"after monitor thread has started\n"));
4377 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4384 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4386 : __kmp_hidden_helper_threads_num + 1;
4388 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4390 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4393 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4394 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4399 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4401 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4403 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4406 __itt_suppress_mark_range(
4407 __itt_suppress_range, __itt_suppress_threading_errors,
4408 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4409 __itt_suppress_mark_range(
4410 __itt_suppress_range, __itt_suppress_threading_errors,
4411 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4413 __itt_suppress_mark_range(
4414 __itt_suppress_range, __itt_suppress_threading_errors,
4415 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4417 __itt_suppress_mark_range(__itt_suppress_range,
4418 __itt_suppress_threading_errors,
4419 &new_thr->th.th_suspend_init_count,
4420 sizeof(new_thr->th.th_suspend_init_count));
4423 __itt_suppress_mark_range(__itt_suppress_range,
4424 __itt_suppress_threading_errors,
4425 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4426 sizeof(new_thr->th.th_bar[0].bb.b_go));
4427 __itt_suppress_mark_range(__itt_suppress_range,
4428 __itt_suppress_threading_errors,
4429 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4430 sizeof(new_thr->th.th_bar[1].bb.b_go));
4431 __itt_suppress_mark_range(__itt_suppress_range,
4432 __itt_suppress_threading_errors,
4433 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4434 sizeof(new_thr->th.th_bar[2].bb.b_go));
4436 if (__kmp_storage_map) {
4437 __kmp_print_thread_storage_map(new_thr, new_gtid);
4442 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4443 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4444 new_thr->th.th_serial_team = serial_team =
4445 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4449 proc_bind_default, &r_icvs,
4450 0 USE_NESTED_HOT_ARG(NULL));
4452 KMP_ASSERT(serial_team);
4453 serial_team->t.t_serialized = 0;
4455 serial_team->t.t_threads[0] = new_thr;
4457 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4461 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4464 __kmp_initialize_fast_memory(new_thr);
4468 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4469 __kmp_initialize_bget(new_thr);
4472 __kmp_init_random(new_thr);
4476 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4477 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4480 kmp_balign_t *balign = new_thr->th.th_bar;
4481 for (b = 0; b < bs_last_barrier; ++b) {
4482 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4483 balign[b].bb.team = NULL;
4484 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4485 balign[b].bb.use_oncore_barrier = 0;
4488 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4489 new_thr->th.th_sleep_loc_type = flag_unset;
4491 new_thr->th.th_spin_here = FALSE;
4492 new_thr->th.th_next_waiting = 0;
4494 new_thr->th.th_blocking =
false;
4497 #if KMP_AFFINITY_SUPPORTED
4498 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4499 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4500 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4501 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4503 new_thr->th.th_def_allocator = __kmp_def_allocator;
4504 new_thr->th.th_prev_level = 0;
4505 new_thr->th.th_prev_num_threads = 1;
4507 TCW_4(new_thr->th.th_in_pool, FALSE);
4508 new_thr->th.th_active_in_pool = FALSE;
4509 TCW_4(new_thr->th.th_active, TRUE);
4517 if (__kmp_adjust_gtid_mode) {
4518 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4519 if (TCR_4(__kmp_gtid_mode) != 2) {
4520 TCW_4(__kmp_gtid_mode, 2);
4523 if (TCR_4(__kmp_gtid_mode) != 1) {
4524 TCW_4(__kmp_gtid_mode, 1);
4529 #ifdef KMP_ADJUST_BLOCKTIME
4532 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4533 if (__kmp_nth > __kmp_avail_proc) {
4534 __kmp_zero_bt = TRUE;
4541 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4542 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4544 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4546 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4557 static void __kmp_reinitialize_team(kmp_team_t *team,
4558 kmp_internal_control_t *new_icvs,
4560 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4561 team->t.t_threads[0], team));
4562 KMP_DEBUG_ASSERT(team && new_icvs);
4563 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4564 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4566 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4568 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4569 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4571 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4572 team->t.t_threads[0], team));
4578 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4579 kmp_internal_control_t *new_icvs,
4581 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4584 KMP_DEBUG_ASSERT(team);
4585 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4586 KMP_DEBUG_ASSERT(team->t.t_threads);
4589 team->t.t_master_tid = 0;
4591 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4592 team->t.t_nproc = new_nproc;
4595 team->t.t_next_pool = NULL;
4599 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4600 team->t.t_invoke = NULL;
4603 team->t.t_sched.sched = new_icvs->sched.sched;
4605 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4606 team->t.t_fp_control_saved = FALSE;
4607 team->t.t_x87_fpu_control_word = 0;
4608 team->t.t_mxcsr = 0;
4611 team->t.t_construct = 0;
4613 team->t.t_ordered.dt.t_value = 0;
4614 team->t.t_master_active = FALSE;
4617 team->t.t_copypriv_data = NULL;
4620 team->t.t_copyin_counter = 0;
4623 team->t.t_control_stack_top = NULL;
4625 __kmp_reinitialize_team(team, new_icvs, loc);
4628 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4631 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4634 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4635 if (KMP_AFFINITY_CAPABLE()) {
4637 if (old_mask != NULL) {
4638 status = __kmp_get_system_affinity(old_mask, TRUE);
4641 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4645 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4650 #if KMP_AFFINITY_SUPPORTED
4656 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4658 if (KMP_HIDDEN_HELPER_TEAM(team))
4661 kmp_info_t *master_th = team->t.t_threads[0];
4662 KMP_DEBUG_ASSERT(master_th != NULL);
4663 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4664 int first_place = master_th->th.th_first_place;
4665 int last_place = master_th->th.th_last_place;
4666 int masters_place = master_th->th.th_current_place;
4667 team->t.t_first_place = first_place;
4668 team->t.t_last_place = last_place;
4670 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4671 "bound to place %d partition = [%d,%d]\n",
4672 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4673 team->t.t_id, masters_place, first_place, last_place));
4675 switch (proc_bind) {
4677 case proc_bind_default:
4680 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4683 case proc_bind_primary: {
4685 int n_th = team->t.t_nproc;
4686 for (f = 1; f < n_th; f++) {
4687 kmp_info_t *th = team->t.t_threads[f];
4688 KMP_DEBUG_ASSERT(th != NULL);
4689 th->th.th_first_place = first_place;
4690 th->th.th_last_place = last_place;
4691 th->th.th_new_place = masters_place;
4692 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4693 team->t.t_display_affinity != 1) {
4694 team->t.t_display_affinity = 1;
4697 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4698 "partition = [%d,%d]\n",
4699 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4700 f, masters_place, first_place, last_place));
4704 case proc_bind_close: {
4706 int n_th = team->t.t_nproc;
4708 if (first_place <= last_place) {
4709 n_places = last_place - first_place + 1;
4711 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4713 if (n_th <= n_places) {
4714 int place = masters_place;
4715 for (f = 1; f < n_th; f++) {
4716 kmp_info_t *th = team->t.t_threads[f];
4717 KMP_DEBUG_ASSERT(th != NULL);
4719 if (place == last_place) {
4720 place = first_place;
4721 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4726 th->th.th_first_place = first_place;
4727 th->th.th_last_place = last_place;
4728 th->th.th_new_place = place;
4729 if (__kmp_display_affinity && place != th->th.th_current_place &&
4730 team->t.t_display_affinity != 1) {
4731 team->t.t_display_affinity = 1;
4734 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4735 "partition = [%d,%d]\n",
4736 __kmp_gtid_from_thread(team->t.t_threads[f]),
4737 team->t.t_id, f, place, first_place, last_place));
4740 int S, rem, gap, s_count;
4741 S = n_th / n_places;
4743 rem = n_th - (S * n_places);
4744 gap = rem > 0 ? n_places / rem : n_places;
4745 int place = masters_place;
4747 for (f = 0; f < n_th; f++) {
4748 kmp_info_t *th = team->t.t_threads[f];
4749 KMP_DEBUG_ASSERT(th != NULL);
4751 th->th.th_first_place = first_place;
4752 th->th.th_last_place = last_place;
4753 th->th.th_new_place = place;
4754 if (__kmp_display_affinity && place != th->th.th_current_place &&
4755 team->t.t_display_affinity != 1) {
4756 team->t.t_display_affinity = 1;
4760 if ((s_count == S) && rem && (gap_ct == gap)) {
4762 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4764 if (place == last_place) {
4765 place = first_place;
4766 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4774 }
else if (s_count == S) {
4775 if (place == last_place) {
4776 place = first_place;
4777 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4787 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4788 "partition = [%d,%d]\n",
4789 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4790 th->th.th_new_place, first_place, last_place));
4792 KMP_DEBUG_ASSERT(place == masters_place);
4796 case proc_bind_spread: {
4798 int n_th = team->t.t_nproc;
4801 if (first_place <= last_place) {
4802 n_places = last_place - first_place + 1;
4804 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4806 if (n_th <= n_places) {
4809 if (n_places !=
static_cast<int>(__kmp_affinity_num_masks)) {
4810 int S = n_places / n_th;
4811 int s_count, rem, gap, gap_ct;
4813 place = masters_place;
4814 rem = n_places - n_th * S;
4815 gap = rem ? n_th / rem : 1;
4818 if (update_master_only == 1)
4820 for (f = 0; f < thidx; f++) {
4821 kmp_info_t *th = team->t.t_threads[f];
4822 KMP_DEBUG_ASSERT(th != NULL);
4824 th->th.th_first_place = place;
4825 th->th.th_new_place = place;
4826 if (__kmp_display_affinity && place != th->th.th_current_place &&
4827 team->t.t_display_affinity != 1) {
4828 team->t.t_display_affinity = 1;
4831 while (s_count < S) {
4832 if (place == last_place) {
4833 place = first_place;
4834 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4841 if (rem && (gap_ct == gap)) {
4842 if (place == last_place) {
4843 place = first_place;
4844 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4852 th->th.th_last_place = place;
4855 if (place == last_place) {
4856 place = first_place;
4857 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4864 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4865 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4866 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4867 f, th->th.th_new_place, th->th.th_first_place,
4868 th->th.th_last_place, __kmp_affinity_num_masks));
4874 double current =
static_cast<double>(masters_place);
4876 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4881 if (update_master_only == 1)
4883 for (f = 0; f < thidx; f++) {
4884 first =
static_cast<int>(current);
4885 last =
static_cast<int>(current + spacing) - 1;
4886 KMP_DEBUG_ASSERT(last >= first);
4887 if (first >= n_places) {
4888 if (masters_place) {
4891 if (first == (masters_place + 1)) {
4892 KMP_DEBUG_ASSERT(f == n_th);
4895 if (last == masters_place) {
4896 KMP_DEBUG_ASSERT(f == (n_th - 1));
4900 KMP_DEBUG_ASSERT(f == n_th);
4905 if (last >= n_places) {
4906 last = (n_places - 1);
4911 KMP_DEBUG_ASSERT(0 <= first);
4912 KMP_DEBUG_ASSERT(n_places > first);
4913 KMP_DEBUG_ASSERT(0 <= last);
4914 KMP_DEBUG_ASSERT(n_places > last);
4915 KMP_DEBUG_ASSERT(last_place >= first_place);
4916 th = team->t.t_threads[f];
4917 KMP_DEBUG_ASSERT(th);
4918 th->th.th_first_place = first;
4919 th->th.th_new_place = place;
4920 th->th.th_last_place = last;
4921 if (__kmp_display_affinity && place != th->th.th_current_place &&
4922 team->t.t_display_affinity != 1) {
4923 team->t.t_display_affinity = 1;
4926 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4927 "partition = [%d,%d], spacing = %.4f\n",
4928 __kmp_gtid_from_thread(team->t.t_threads[f]),
4929 team->t.t_id, f, th->th.th_new_place,
4930 th->th.th_first_place, th->th.th_last_place, spacing));
4934 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4936 int S, rem, gap, s_count;
4937 S = n_th / n_places;
4939 rem = n_th - (S * n_places);
4940 gap = rem > 0 ? n_places / rem : n_places;
4941 int place = masters_place;
4944 if (update_master_only == 1)
4946 for (f = 0; f < thidx; f++) {
4947 kmp_info_t *th = team->t.t_threads[f];
4948 KMP_DEBUG_ASSERT(th != NULL);
4950 th->th.th_first_place = place;
4951 th->th.th_last_place = place;
4952 th->th.th_new_place = place;
4953 if (__kmp_display_affinity && place != th->th.th_current_place &&
4954 team->t.t_display_affinity != 1) {
4955 team->t.t_display_affinity = 1;
4959 if ((s_count == S) && rem && (gap_ct == gap)) {
4961 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4963 if (place == last_place) {
4964 place = first_place;
4965 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4973 }
else if (s_count == S) {
4974 if (place == last_place) {
4975 place = first_place;
4976 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4985 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4986 "partition = [%d,%d]\n",
4987 __kmp_gtid_from_thread(team->t.t_threads[f]),
4988 team->t.t_id, f, th->th.th_new_place,
4989 th->th.th_first_place, th->th.th_last_place));
4991 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4999 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5007 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5009 ompt_data_t ompt_parallel_data,
5011 kmp_proc_bind_t new_proc_bind,
5012 kmp_internal_control_t *new_icvs,
5013 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5014 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5017 int use_hot_team = !root->r.r_active;
5020 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5021 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5022 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5025 #if KMP_NESTED_HOT_TEAMS
5026 kmp_hot_team_ptr_t *hot_teams;
5028 team = master->th.th_team;
5029 level = team->t.t_active_level;
5030 if (master->th.th_teams_microtask) {
5031 if (master->th.th_teams_size.nteams > 1 &&
5034 (microtask_t)__kmp_teams_master ||
5035 master->th.th_teams_level <
5041 hot_teams = master->th.th_hot_teams;
5042 if (level < __kmp_hot_teams_max_level && hot_teams &&
5043 hot_teams[level].hot_team) {
5051 KMP_DEBUG_ASSERT(new_nproc == 1);
5055 if (use_hot_team && new_nproc > 1) {
5056 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5057 #if KMP_NESTED_HOT_TEAMS
5058 team = hot_teams[level].hot_team;
5060 team = root->r.r_hot_team;
5063 if (__kmp_tasking_mode != tskm_immediate_exec) {
5064 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5065 "task_team[1] = %p before reinit\n",
5066 team->t.t_task_team[0], team->t.t_task_team[1]));
5070 if (team->t.t_nproc != new_nproc &&
5071 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5073 int old_nthr = team->t.t_nproc;
5074 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5080 if (team->t.t_nproc == new_nproc) {
5081 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5084 if (team->t.t_size_changed == -1) {
5085 team->t.t_size_changed = 1;
5087 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5091 kmp_r_sched_t new_sched = new_icvs->sched;
5093 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5095 __kmp_reinitialize_team(team, new_icvs,
5096 root->r.r_uber_thread->th.th_ident);
5098 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5099 team->t.t_threads[0], team));
5100 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5102 #if KMP_AFFINITY_SUPPORTED
5103 if ((team->t.t_size_changed == 0) &&
5104 (team->t.t_proc_bind == new_proc_bind)) {
5105 if (new_proc_bind == proc_bind_spread) {
5106 __kmp_partition_places(
5109 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5110 "proc_bind = %d, partition = [%d,%d]\n",
5111 team->t.t_id, new_proc_bind, team->t.t_first_place,
5112 team->t.t_last_place));
5114 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5115 __kmp_partition_places(team);
5118 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5120 }
else if (team->t.t_nproc > new_nproc) {
5122 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5125 team->t.t_size_changed = 1;
5126 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5129 __kmp_add_threads_to_team(team, new_nproc);
5131 #if KMP_NESTED_HOT_TEAMS
5132 if (__kmp_hot_teams_mode == 0) {
5135 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5136 hot_teams[level].hot_team_nth = new_nproc;
5139 for (f = new_nproc; f < team->t.t_nproc; f++) {
5140 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5141 if (__kmp_tasking_mode != tskm_immediate_exec) {
5144 team->t.t_threads[f]->th.th_task_team = NULL;
5146 __kmp_free_thread(team->t.t_threads[f]);
5147 team->t.t_threads[f] = NULL;
5149 #if KMP_NESTED_HOT_TEAMS
5154 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5155 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5156 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5157 for (
int b = 0; b < bs_last_barrier; ++b) {
5158 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5159 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5161 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5166 team->t.t_nproc = new_nproc;
5168 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5169 __kmp_reinitialize_team(team, new_icvs,
5170 root->r.r_uber_thread->th.th_ident);
5173 for (f = 0; f < new_nproc; ++f) {
5174 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5179 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5180 team->t.t_threads[0], team));
5182 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5185 for (f = 0; f < team->t.t_nproc; f++) {
5186 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5187 team->t.t_threads[f]->th.th_team_nproc ==
5192 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5193 #if KMP_AFFINITY_SUPPORTED
5194 __kmp_partition_places(team);
5197 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5198 kmp_affin_mask_t *old_mask;
5199 if (KMP_AFFINITY_CAPABLE()) {
5200 KMP_CPU_ALLOC(old_mask);
5205 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5207 int old_nproc = team->t.t_nproc;
5208 team->t.t_size_changed = 1;
5210 #if KMP_NESTED_HOT_TEAMS
5211 int avail_threads = hot_teams[level].hot_team_nth;
5212 if (new_nproc < avail_threads)
5213 avail_threads = new_nproc;
5214 kmp_info_t **other_threads = team->t.t_threads;
5215 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5219 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5220 for (b = 0; b < bs_last_barrier; ++b) {
5221 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5222 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5224 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5228 if (hot_teams[level].hot_team_nth >= new_nproc) {
5231 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5232 team->t.t_nproc = new_nproc;
5236 team->t.t_nproc = hot_teams[level].hot_team_nth;
5237 hot_teams[level].hot_team_nth = new_nproc;
5239 if (team->t.t_max_nproc < new_nproc) {
5241 __kmp_reallocate_team_arrays(team, new_nproc);
5242 __kmp_reinitialize_team(team, new_icvs, NULL);
5245 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5251 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5255 for (f = team->t.t_nproc; f < new_nproc; f++) {
5256 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5257 KMP_DEBUG_ASSERT(new_worker);
5258 team->t.t_threads[f] = new_worker;
5261 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5262 "join=%llu, plain=%llu\n",
5263 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5264 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5265 team->t.t_bar[bs_plain_barrier].b_arrived));
5269 kmp_balign_t *balign = new_worker->th.th_bar;
5270 for (b = 0; b < bs_last_barrier; ++b) {
5271 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5272 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5273 KMP_BARRIER_PARENT_FLAG);
5275 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5281 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5282 if (KMP_AFFINITY_CAPABLE()) {
5284 __kmp_set_system_affinity(old_mask, TRUE);
5285 KMP_CPU_FREE(old_mask);
5288 #if KMP_NESTED_HOT_TEAMS
5291 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5294 __kmp_add_threads_to_team(team, new_nproc);
5298 __kmp_initialize_team(team, new_nproc, new_icvs,
5299 root->r.r_uber_thread->th.th_ident);
5302 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5303 for (f = 0; f < team->t.t_nproc; ++f)
5304 __kmp_initialize_info(team->t.t_threads[f], team, f,
5305 __kmp_gtid_from_tid(f, team));
5313 for (f = old_nproc; f < team->t.t_nproc; ++f)
5314 team->t.t_threads[f]->th.th_task_state =
5315 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5318 kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
5319 for (f = old_nproc; f < team->t.t_nproc; ++f)
5320 team->t.t_threads[f]->th.th_task_state = old_state;
5324 for (f = 0; f < team->t.t_nproc; ++f) {
5325 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5326 team->t.t_threads[f]->th.th_team_nproc ==
5331 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5332 #if KMP_AFFINITY_SUPPORTED
5333 __kmp_partition_places(team);
5337 kmp_info_t *master = team->t.t_threads[0];
5338 if (master->th.th_teams_microtask) {
5339 for (f = 1; f < new_nproc; ++f) {
5341 kmp_info_t *thr = team->t.t_threads[f];
5342 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5343 thr->th.th_teams_level = master->th.th_teams_level;
5344 thr->th.th_teams_size = master->th.th_teams_size;
5347 #if KMP_NESTED_HOT_TEAMS
5351 for (f = 1; f < new_nproc; ++f) {
5352 kmp_info_t *thr = team->t.t_threads[f];
5354 kmp_balign_t *balign = thr->th.th_bar;
5355 for (b = 0; b < bs_last_barrier; ++b) {
5356 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5357 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5359 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5367 __kmp_alloc_argv_entries(argc, team, TRUE);
5368 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5372 KF_TRACE(10, (
" hot_team = %p\n", team));
5375 if (__kmp_tasking_mode != tskm_immediate_exec) {
5376 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5377 "task_team[1] = %p after reinit\n",
5378 team->t.t_task_team[0], team->t.t_task_team[1]));
5383 __ompt_team_assign_id(team, ompt_parallel_data);
5393 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5396 if (team->t.t_max_nproc >= max_nproc) {
5398 __kmp_team_pool = team->t.t_next_pool;
5400 if (max_nproc > 1 &&
5401 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5403 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5408 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5410 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5411 "task_team[1] %p to NULL\n",
5412 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5413 team->t.t_task_team[0] = NULL;
5414 team->t.t_task_team[1] = NULL;
5417 __kmp_alloc_argv_entries(argc, team, TRUE);
5418 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5421 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5422 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5425 for (b = 0; b < bs_last_barrier; ++b) {
5426 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5428 team->t.t_bar[b].b_master_arrived = 0;
5429 team->t.t_bar[b].b_team_arrived = 0;
5434 team->t.t_proc_bind = new_proc_bind;
5436 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5440 __ompt_team_assign_id(team, ompt_parallel_data);
5452 team = __kmp_reap_team(team);
5453 __kmp_team_pool = team;
5458 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5461 team->t.t_max_nproc = max_nproc;
5462 if (max_nproc > 1 &&
5463 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5465 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5470 __kmp_allocate_team_arrays(team, max_nproc);
5472 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5473 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5475 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5477 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5478 team->t.t_task_team[0] = NULL;
5480 team->t.t_task_team[1] = NULL;
5483 if (__kmp_storage_map) {
5484 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5488 __kmp_alloc_argv_entries(argc, team, FALSE);
5489 team->t.t_argc = argc;
5492 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5493 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5496 for (b = 0; b < bs_last_barrier; ++b) {
5497 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5499 team->t.t_bar[b].b_master_arrived = 0;
5500 team->t.t_bar[b].b_team_arrived = 0;
5505 team->t.t_proc_bind = new_proc_bind;
5508 __ompt_team_assign_id(team, ompt_parallel_data);
5509 team->t.ompt_serialized_team_info = NULL;
5514 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5525 void __kmp_free_team(kmp_root_t *root,
5526 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5528 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5532 KMP_DEBUG_ASSERT(root);
5533 KMP_DEBUG_ASSERT(team);
5534 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5535 KMP_DEBUG_ASSERT(team->t.t_threads);
5537 int use_hot_team = team == root->r.r_hot_team;
5538 #if KMP_NESTED_HOT_TEAMS
5541 level = team->t.t_active_level - 1;
5542 if (master->th.th_teams_microtask) {
5543 if (master->th.th_teams_size.nteams > 1) {
5547 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5548 master->th.th_teams_level == team->t.t_level) {
5554 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5556 if (level < __kmp_hot_teams_max_level) {
5557 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5564 TCW_SYNC_PTR(team->t.t_pkfn,
5567 team->t.t_copyin_counter = 0;
5572 if (!use_hot_team) {
5573 if (__kmp_tasking_mode != tskm_immediate_exec) {
5575 for (f = 1; f < team->t.t_nproc; ++f) {
5576 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5577 kmp_info_t *th = team->t.t_threads[f];
5578 volatile kmp_uint32 *state = &th->th.th_reap_state;
5579 while (*state != KMP_SAFE_TO_REAP) {
5583 if (!__kmp_is_thread_alive(th, &ecode)) {
5584 *state = KMP_SAFE_TO_REAP;
5589 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5590 if (fl.is_sleeping())
5591 fl.resume(__kmp_gtid_from_thread(th));
5598 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5599 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5600 if (task_team != NULL) {
5601 for (f = 0; f < team->t.t_nproc; ++f) {
5602 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5603 team->t.t_threads[f]->th.th_task_team = NULL;
5607 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5608 __kmp_get_gtid(), task_team, team->t.t_id));
5609 #if KMP_NESTED_HOT_TEAMS
5610 __kmp_free_task_team(master, task_team);
5612 team->t.t_task_team[tt_idx] = NULL;
5618 team->t.t_parent = NULL;
5619 team->t.t_level = 0;
5620 team->t.t_active_level = 0;
5623 for (f = 1; f < team->t.t_nproc; ++f) {
5624 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5625 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5626 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5629 __kmp_free_thread(team->t.t_threads[f]);
5632 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5635 team->t.b->go_release();
5636 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5637 for (f = 1; f < team->t.t_nproc; ++f) {
5638 if (team->t.b->sleep[f].sleep) {
5639 __kmp_atomic_resume_64(
5640 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5641 (kmp_atomic_flag_64<> *)NULL);
5646 for (
int f = 1; f < team->t.t_nproc; ++f) {
5647 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5653 for (f = 1; f < team->t.t_nproc; ++f) {
5654 team->t.t_threads[f] = NULL;
5657 if (team->t.t_max_nproc > 1 &&
5658 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5659 distributedBarrier::deallocate(team->t.b);
5664 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5665 __kmp_team_pool = (
volatile kmp_team_t *)team;
5668 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5669 team->t.t_threads[1]->th.th_cg_roots);
5670 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5672 for (f = 1; f < team->t.t_nproc; ++f) {
5673 kmp_info_t *thr = team->t.t_threads[f];
5674 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5675 thr->th.th_cg_roots->cg_root == thr);
5677 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5678 thr->th.th_cg_roots = tmp->up;
5679 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5680 " up to node %p. cg_nthreads was %d\n",
5681 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5682 int i = tmp->cg_nthreads--;
5687 if (thr->th.th_cg_roots)
5688 thr->th.th_current_task->td_icvs.thread_limit =
5689 thr->th.th_cg_roots->cg_thread_limit;
5698 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5699 kmp_team_t *next_pool = team->t.t_next_pool;
5701 KMP_DEBUG_ASSERT(team);
5702 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5703 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5704 KMP_DEBUG_ASSERT(team->t.t_threads);
5705 KMP_DEBUG_ASSERT(team->t.t_argv);
5710 __kmp_free_team_arrays(team);
5711 if (team->t.t_argv != &team->t.t_inline_argv[0])
5712 __kmp_free((
void *)team->t.t_argv);
5744 void __kmp_free_thread(kmp_info_t *this_th) {
5748 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5749 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5751 KMP_DEBUG_ASSERT(this_th);
5756 kmp_balign_t *balign = this_th->th.th_bar;
5757 for (b = 0; b < bs_last_barrier; ++b) {
5758 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5759 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5760 balign[b].bb.team = NULL;
5761 balign[b].bb.leaf_kids = 0;
5763 this_th->th.th_task_state = 0;
5764 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5767 TCW_PTR(this_th->th.th_team, NULL);
5768 TCW_PTR(this_th->th.th_root, NULL);
5769 TCW_PTR(this_th->th.th_dispatch, NULL);
5771 while (this_th->th.th_cg_roots) {
5772 this_th->th.th_cg_roots->cg_nthreads--;
5773 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5774 " %p of thread %p to %d\n",
5775 this_th, this_th->th.th_cg_roots,
5776 this_th->th.th_cg_roots->cg_root,
5777 this_th->th.th_cg_roots->cg_nthreads));
5778 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5779 if (tmp->cg_root == this_th) {
5780 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5782 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5783 this_th->th.th_cg_roots = tmp->up;
5786 if (tmp->cg_nthreads == 0) {
5789 this_th->th.th_cg_roots = NULL;
5799 __kmp_free_implicit_task(this_th);
5800 this_th->th.th_current_task = NULL;
5804 gtid = this_th->th.th_info.ds.ds_gtid;
5805 if (__kmp_thread_pool_insert_pt != NULL) {
5806 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5807 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5808 __kmp_thread_pool_insert_pt = NULL;
5817 if (__kmp_thread_pool_insert_pt != NULL) {
5818 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5820 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5822 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5823 scan = &((*scan)->th.th_next_pool))
5828 TCW_PTR(this_th->th.th_next_pool, *scan);
5829 __kmp_thread_pool_insert_pt = *scan = this_th;
5830 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5831 (this_th->th.th_info.ds.ds_gtid <
5832 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5833 TCW_4(this_th->th.th_in_pool, TRUE);
5834 __kmp_suspend_initialize_thread(this_th);
5835 __kmp_lock_suspend_mx(this_th);
5836 if (this_th->th.th_active == TRUE) {
5837 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5838 this_th->th.th_active_in_pool = TRUE;
5842 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5845 __kmp_unlock_suspend_mx(this_th);
5847 TCW_4(__kmp_nth, __kmp_nth - 1);
5849 #ifdef KMP_ADJUST_BLOCKTIME
5852 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5853 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5854 if (__kmp_nth <= __kmp_avail_proc) {
5855 __kmp_zero_bt = FALSE;
5865 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5866 #if OMP_PROFILING_SUPPORT
5867 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5869 if (ProfileTraceFile)
5870 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5873 int gtid = this_thr->th.th_info.ds.ds_gtid;
5875 kmp_team_t **
volatile pteam;
5878 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5880 if (__kmp_env_consistency_check) {
5881 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5885 if (ompd_state & OMPD_ENABLE_BP)
5886 ompd_bp_thread_begin();
5890 ompt_data_t *thread_data =
nullptr;
5891 if (ompt_enabled.enabled) {
5892 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5893 *thread_data = ompt_data_none;
5895 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5896 this_thr->th.ompt_thread_info.wait_id = 0;
5897 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5898 this_thr->th.ompt_thread_info.parallel_flags = 0;
5899 if (ompt_enabled.ompt_callback_thread_begin) {
5900 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5901 ompt_thread_worker, thread_data);
5903 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5908 while (!TCR_4(__kmp_global.g.g_done)) {
5909 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5913 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5916 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5919 if (ompt_enabled.enabled) {
5920 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5924 pteam = &this_thr->th.th_team;
5927 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5929 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5932 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5933 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5934 (*pteam)->t.t_pkfn));
5936 updateHWFPControl(*pteam);
5939 if (ompt_enabled.enabled) {
5940 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5944 rc = (*pteam)->t.t_invoke(gtid);
5948 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5949 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5950 (*pteam)->t.t_pkfn));
5953 if (ompt_enabled.enabled) {
5955 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
5957 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5961 __kmp_join_barrier(gtid);
5964 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5967 if (ompd_state & OMPD_ENABLE_BP)
5968 ompd_bp_thread_end();
5972 if (ompt_enabled.ompt_callback_thread_end) {
5973 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5977 this_thr->th.th_task_team = NULL;
5979 __kmp_common_destroy_gtid(gtid);
5981 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5984 #if OMP_PROFILING_SUPPORT
5985 llvm::timeTraceProfilerFinishThread();
5992 void __kmp_internal_end_dest(
void *specific_gtid) {
5995 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
5997 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6001 __kmp_internal_end_thread(gtid);
6004 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6006 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6007 __kmp_internal_end_atexit();
6014 void __kmp_internal_end_atexit(
void) {
6015 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6039 __kmp_internal_end_library(-1);
6041 __kmp_close_console();
6045 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6050 KMP_DEBUG_ASSERT(thread != NULL);
6052 gtid = thread->th.th_info.ds.ds_gtid;
6055 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6058 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6060 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6062 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6064 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6068 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6070 __kmp_release_64(&flag);
6075 __kmp_reap_worker(thread);
6087 if (thread->th.th_active_in_pool) {
6088 thread->th.th_active_in_pool = FALSE;
6089 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6090 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6094 __kmp_free_implicit_task(thread);
6098 __kmp_free_fast_memory(thread);
6101 __kmp_suspend_uninitialize_thread(thread);
6103 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6104 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6109 #ifdef KMP_ADJUST_BLOCKTIME
6112 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6113 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6114 if (__kmp_nth <= __kmp_avail_proc) {
6115 __kmp_zero_bt = FALSE;
6121 if (__kmp_env_consistency_check) {
6122 if (thread->th.th_cons) {
6123 __kmp_free_cons_stack(thread->th.th_cons);
6124 thread->th.th_cons = NULL;
6128 if (thread->th.th_pri_common != NULL) {
6129 __kmp_free(thread->th.th_pri_common);
6130 thread->th.th_pri_common = NULL;
6133 if (thread->th.th_task_state_memo_stack != NULL) {
6134 __kmp_free(thread->th.th_task_state_memo_stack);
6135 thread->th.th_task_state_memo_stack = NULL;
6139 if (thread->th.th_local.bget_data != NULL) {
6140 __kmp_finalize_bget(thread);
6144 #if KMP_AFFINITY_SUPPORTED
6145 if (thread->th.th_affin_mask != NULL) {
6146 KMP_CPU_FREE(thread->th.th_affin_mask);
6147 thread->th.th_affin_mask = NULL;
6151 #if KMP_USE_HIER_SCHED
6152 if (thread->th.th_hier_bar_data != NULL) {
6153 __kmp_free(thread->th.th_hier_bar_data);
6154 thread->th.th_hier_bar_data = NULL;
6158 __kmp_reap_team(thread->th.th_serial_team);
6159 thread->th.th_serial_team = NULL;
6166 static void __kmp_internal_end(
void) {
6170 __kmp_unregister_library();
6177 __kmp_reclaim_dead_roots();
6181 for (i = 0; i < __kmp_threads_capacity; i++)
6183 if (__kmp_root[i]->r.r_active)
6186 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6188 if (i < __kmp_threads_capacity) {
6200 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6201 if (TCR_4(__kmp_init_monitor)) {
6202 __kmp_reap_monitor(&__kmp_monitor);
6203 TCW_4(__kmp_init_monitor, 0);
6205 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6206 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6212 for (i = 0; i < __kmp_threads_capacity; i++) {
6213 if (__kmp_root[i]) {
6216 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6225 while (__kmp_thread_pool != NULL) {
6227 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6228 __kmp_thread_pool = thread->th.th_next_pool;
6230 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6231 thread->th.th_next_pool = NULL;
6232 thread->th.th_in_pool = FALSE;
6233 __kmp_reap_thread(thread, 0);
6235 __kmp_thread_pool_insert_pt = NULL;
6238 while (__kmp_team_pool != NULL) {
6240 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6241 __kmp_team_pool = team->t.t_next_pool;
6243 team->t.t_next_pool = NULL;
6244 __kmp_reap_team(team);
6247 __kmp_reap_task_teams();
6254 for (i = 0; i < __kmp_threads_capacity; i++) {
6255 kmp_info_t *thr = __kmp_threads[i];
6256 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6261 for (i = 0; i < __kmp_threads_capacity; ++i) {
6268 TCW_SYNC_4(__kmp_init_common, FALSE);
6270 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6278 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6279 if (TCR_4(__kmp_init_monitor)) {
6280 __kmp_reap_monitor(&__kmp_monitor);
6281 TCW_4(__kmp_init_monitor, 0);
6283 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6284 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6287 TCW_4(__kmp_init_gtid, FALSE);
6296 void __kmp_internal_end_library(
int gtid_req) {
6303 if (__kmp_global.g.g_abort) {
6304 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6308 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6309 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6314 if (TCR_4(__kmp_init_hidden_helper) &&
6315 !TCR_4(__kmp_hidden_helper_team_done)) {
6316 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6318 __kmp_hidden_helper_main_thread_release();
6320 __kmp_hidden_helper_threads_deinitz_wait();
6326 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6328 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6329 if (gtid == KMP_GTID_SHUTDOWN) {
6330 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6331 "already shutdown\n"));
6333 }
else if (gtid == KMP_GTID_MONITOR) {
6334 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6335 "registered, or system shutdown\n"));
6337 }
else if (gtid == KMP_GTID_DNE) {
6338 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6341 }
else if (KMP_UBER_GTID(gtid)) {
6343 if (__kmp_root[gtid]->r.r_active) {
6344 __kmp_global.g.g_abort = -1;
6345 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6346 __kmp_unregister_library();
6348 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6354 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6355 __kmp_unregister_root_current_thread(gtid);
6362 #ifdef DUMP_DEBUG_ON_EXIT
6363 if (__kmp_debug_buf)
6364 __kmp_dump_debug_buffer();
6369 __kmp_unregister_library();
6374 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6377 if (__kmp_global.g.g_abort) {
6378 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6380 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6383 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6384 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6393 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6396 __kmp_internal_end();
6398 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6399 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6401 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6403 #ifdef DUMP_DEBUG_ON_EXIT
6404 if (__kmp_debug_buf)
6405 __kmp_dump_debug_buffer();
6409 __kmp_close_console();
6412 __kmp_fini_allocator();
6416 void __kmp_internal_end_thread(
int gtid_req) {
6425 if (__kmp_global.g.g_abort) {
6426 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6430 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6431 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6436 if (TCR_4(__kmp_init_hidden_helper) &&
6437 !TCR_4(__kmp_hidden_helper_team_done)) {
6438 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6440 __kmp_hidden_helper_main_thread_release();
6442 __kmp_hidden_helper_threads_deinitz_wait();
6449 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6451 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6452 if (gtid == KMP_GTID_SHUTDOWN) {
6453 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6454 "already shutdown\n"));
6456 }
else if (gtid == KMP_GTID_MONITOR) {
6457 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6458 "registered, or system shutdown\n"));
6460 }
else if (gtid == KMP_GTID_DNE) {
6461 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6465 }
else if (KMP_UBER_GTID(gtid)) {
6467 if (__kmp_root[gtid]->r.r_active) {
6468 __kmp_global.g.g_abort = -1;
6469 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6471 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6475 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6477 __kmp_unregister_root_current_thread(gtid);
6481 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6484 __kmp_threads[gtid]->th.th_task_team = NULL;
6488 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6494 if (__kmp_pause_status != kmp_hard_paused)
6498 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6503 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6506 if (__kmp_global.g.g_abort) {
6507 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6509 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6512 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6513 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6524 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6526 for (i = 0; i < __kmp_threads_capacity; ++i) {
6527 if (KMP_UBER_GTID(i)) {
6530 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6531 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6532 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6539 __kmp_internal_end();
6541 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6542 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6544 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6546 #ifdef DUMP_DEBUG_ON_EXIT
6547 if (__kmp_debug_buf)
6548 __kmp_dump_debug_buffer();
6555 static long __kmp_registration_flag = 0;
6557 static char *__kmp_registration_str = NULL;
6560 static inline char *__kmp_reg_status_name() {
6566 #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6567 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6570 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6574 void __kmp_register_library_startup(
void) {
6576 char *name = __kmp_reg_status_name();
6582 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6583 __kmp_initialize_system_tick();
6585 __kmp_read_system_time(&time.dtime);
6586 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6587 __kmp_registration_str =
6588 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6589 __kmp_registration_flag, KMP_LIBRARY_FILE);
6591 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6592 __kmp_registration_str));
6598 #if defined(KMP_USE_SHM)
6599 char *shm_name = __kmp_str_format(
"/%s", name);
6600 int shm_preexist = 0;
6602 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6603 if ((fd1 == -1) && (errno == EEXIST)) {
6606 fd1 = shm_open(shm_name, O_RDWR, 0666);
6609 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6615 }
else if (fd1 == -1) {
6618 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM2"), KMP_ERR(errno),
6621 if (shm_preexist == 0) {
6623 if (ftruncate(fd1, SHM_SIZE) == -1) {
6625 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6626 KMP_ERR(errno), __kmp_msg_null);
6630 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6631 if (data1 == MAP_FAILED) {
6633 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6636 if (shm_preexist == 0) {
6637 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6640 value = __kmp_str_format(
"%s", data1);
6641 munmap(data1, SHM_SIZE);
6645 __kmp_env_set(name, __kmp_registration_str, 0);
6647 value = __kmp_env_get(name);
6650 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6657 char *flag_addr_str = NULL;
6658 char *flag_val_str = NULL;
6659 char const *file_name = NULL;
6660 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6661 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6664 unsigned long *flag_addr = 0;
6665 unsigned long flag_val = 0;
6666 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6667 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6668 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6672 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6686 file_name =
"unknown library";
6691 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6692 if (!__kmp_str_match_true(duplicate_ok)) {
6694 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6695 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6697 KMP_INTERNAL_FREE(duplicate_ok);
6698 __kmp_duplicate_library_ok = 1;
6703 #if defined(KMP_USE_SHM)
6705 shm_unlink(shm_name);
6708 __kmp_env_unset(name);
6712 KMP_DEBUG_ASSERT(0);
6716 KMP_INTERNAL_FREE((
void *)value);
6717 #if defined(KMP_USE_SHM)
6718 KMP_INTERNAL_FREE((
void *)shm_name);
6721 KMP_INTERNAL_FREE((
void *)name);
6725 void __kmp_unregister_library(
void) {
6727 char *name = __kmp_reg_status_name();
6730 #if defined(KMP_USE_SHM)
6731 char *shm_name = __kmp_str_format(
"/%s", name);
6732 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6737 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6738 if (data1 != MAP_FAILED) {
6739 value = __kmp_str_format(
"%s", data1);
6740 munmap(data1, SHM_SIZE);
6744 value = __kmp_env_get(name);
6747 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6748 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6749 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6751 #if defined(KMP_USE_SHM)
6752 shm_unlink(shm_name);
6754 __kmp_env_unset(name);
6758 #if defined(KMP_USE_SHM)
6759 KMP_INTERNAL_FREE(shm_name);
6762 KMP_INTERNAL_FREE(__kmp_registration_str);
6763 KMP_INTERNAL_FREE(value);
6764 KMP_INTERNAL_FREE(name);
6766 __kmp_registration_flag = 0;
6767 __kmp_registration_str = NULL;
6774 #if KMP_MIC_SUPPORTED
6776 static void __kmp_check_mic_type() {
6777 kmp_cpuid_t cpuid_state = {0};
6778 kmp_cpuid_t *cs_p = &cpuid_state;
6779 __kmp_x86_cpuid(1, 0, cs_p);
6781 if ((cs_p->eax & 0xff0) == 0xB10) {
6782 __kmp_mic_type = mic2;
6783 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6784 __kmp_mic_type = mic3;
6786 __kmp_mic_type = non_mic;
6793 static void __kmp_user_level_mwait_init() {
6794 struct kmp_cpuid buf;
6795 __kmp_x86_cpuid(7, 0, &buf);
6796 __kmp_umwait_enabled = ((buf.ecx >> 5) & 1) && __kmp_user_level_mwait;
6797 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6798 __kmp_umwait_enabled));
6800 #elif KMP_HAVE_MWAIT
6801 #ifndef AT_INTELPHIUSERMWAIT
6804 #define AT_INTELPHIUSERMWAIT 10000
6809 unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6810 unsigned long getauxval(
unsigned long) {
return 0; }
6812 static void __kmp_user_level_mwait_init() {
6817 if (__kmp_mic_type == mic3) {
6818 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6819 if ((res & 0x1) || __kmp_user_level_mwait) {
6820 __kmp_mwait_enabled = TRUE;
6821 if (__kmp_user_level_mwait) {
6822 KMP_INFORM(EnvMwaitWarn);
6825 __kmp_mwait_enabled = FALSE;
6828 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
6829 "__kmp_mwait_enabled = %d\n",
6830 __kmp_mic_type, __kmp_mwait_enabled));
6834 static void __kmp_do_serial_initialize(
void) {
6838 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6840 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6841 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6842 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6843 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6844 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6854 __kmp_validate_locks();
6857 __kmp_init_allocator();
6862 __kmp_register_library_startup();
6865 if (TCR_4(__kmp_global.g.g_done)) {
6866 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6869 __kmp_global.g.g_abort = 0;
6870 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6873 #if KMP_USE_ADAPTIVE_LOCKS
6874 #if KMP_DEBUG_ADAPTIVE_LOCKS
6875 __kmp_init_speculative_stats();
6878 #if KMP_STATS_ENABLED
6881 __kmp_init_lock(&__kmp_global_lock);
6882 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6883 __kmp_init_lock(&__kmp_debug_lock);
6884 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6885 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6886 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6887 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6888 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6889 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6890 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6891 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6892 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6893 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6894 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6895 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6896 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6897 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6898 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6900 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6902 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6906 __kmp_runtime_initialize();
6908 #if KMP_MIC_SUPPORTED
6909 __kmp_check_mic_type();
6916 __kmp_abort_delay = 0;
6920 __kmp_dflt_team_nth_ub = __kmp_xproc;
6921 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6922 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6924 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6925 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6927 __kmp_max_nth = __kmp_sys_max_nth;
6928 __kmp_cg_max_nth = __kmp_sys_max_nth;
6929 __kmp_teams_max_nth = __kmp_xproc;
6930 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6931 __kmp_teams_max_nth = __kmp_sys_max_nth;
6936 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6938 __kmp_monitor_wakeups =
6939 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6940 __kmp_bt_intervals =
6941 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6944 __kmp_library = library_throughput;
6946 __kmp_static = kmp_sch_static_balanced;
6953 #if KMP_FAST_REDUCTION_BARRIER
6954 #define kmp_reduction_barrier_gather_bb ((int)1)
6955 #define kmp_reduction_barrier_release_bb ((int)1)
6956 #define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
6957 #define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
6959 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6960 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6961 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6962 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6963 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6964 #if KMP_FAST_REDUCTION_BARRIER
6965 if (i == bs_reduction_barrier) {
6967 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6968 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6969 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6970 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6974 #if KMP_FAST_REDUCTION_BARRIER
6975 #undef kmp_reduction_barrier_release_pat
6976 #undef kmp_reduction_barrier_gather_pat
6977 #undef kmp_reduction_barrier_release_bb
6978 #undef kmp_reduction_barrier_gather_bb
6980 #if KMP_MIC_SUPPORTED
6981 if (__kmp_mic_type == mic2) {
6983 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6984 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6986 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6987 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6989 #if KMP_FAST_REDUCTION_BARRIER
6990 if (__kmp_mic_type == mic2) {
6991 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6992 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6999 __kmp_env_checks = TRUE;
7001 __kmp_env_checks = FALSE;
7005 __kmp_foreign_tp = TRUE;
7007 __kmp_global.g.g_dynamic = FALSE;
7008 __kmp_global.g.g_dynamic_mode = dynamic_default;
7010 __kmp_init_nesting_mode();
7012 __kmp_env_initialize(NULL);
7014 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7015 __kmp_user_level_mwait_init();
7019 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7020 if (__kmp_str_match_true(val)) {
7021 kmp_str_buf_t buffer;
7022 __kmp_str_buf_init(&buffer);
7023 __kmp_i18n_dump_catalog(&buffer);
7024 __kmp_printf(
"%s", buffer.str);
7025 __kmp_str_buf_free(&buffer);
7027 __kmp_env_free(&val);
7030 __kmp_threads_capacity =
7031 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7033 __kmp_tp_capacity = __kmp_default_tp_capacity(
7034 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7039 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7040 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7041 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7042 __kmp_thread_pool = NULL;
7043 __kmp_thread_pool_insert_pt = NULL;
7044 __kmp_team_pool = NULL;
7051 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7053 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7054 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7055 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7058 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7060 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7065 gtid = __kmp_register_root(TRUE);
7066 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7067 KMP_ASSERT(KMP_UBER_GTID(gtid));
7068 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7072 __kmp_common_initialize();
7076 __kmp_register_atfork();
7079 #if !KMP_DYNAMIC_LIB
7083 int rc = atexit(__kmp_internal_end_atexit);
7085 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7091 #if KMP_HANDLE_SIGNALS
7097 __kmp_install_signals(FALSE);
7100 __kmp_install_signals(TRUE);
7105 __kmp_init_counter++;
7107 __kmp_init_serial = TRUE;
7109 if (__kmp_settings) {
7113 if (__kmp_display_env || __kmp_display_env_verbose) {
7114 __kmp_env_print_2();
7123 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7126 void __kmp_serial_initialize(
void) {
7127 if (__kmp_init_serial) {
7130 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7131 if (__kmp_init_serial) {
7132 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7135 __kmp_do_serial_initialize();
7136 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7139 static void __kmp_do_middle_initialize(
void) {
7141 int prev_dflt_team_nth;
7143 if (!__kmp_init_serial) {
7144 __kmp_do_serial_initialize();
7147 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7151 prev_dflt_team_nth = __kmp_dflt_team_nth;
7153 #if KMP_AFFINITY_SUPPORTED
7156 __kmp_affinity_initialize();
7160 KMP_ASSERT(__kmp_xproc > 0);
7161 if (__kmp_avail_proc == 0) {
7162 __kmp_avail_proc = __kmp_xproc;
7168 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7169 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7174 if (__kmp_dflt_team_nth == 0) {
7175 #ifdef KMP_DFLT_NTH_CORES
7177 __kmp_dflt_team_nth = __kmp_ncores;
7178 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7179 "__kmp_ncores (%d)\n",
7180 __kmp_dflt_team_nth));
7183 __kmp_dflt_team_nth = __kmp_avail_proc;
7184 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7185 "__kmp_avail_proc(%d)\n",
7186 __kmp_dflt_team_nth));
7190 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7191 __kmp_dflt_team_nth = KMP_MIN_NTH;
7193 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7194 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7197 if (__kmp_nesting_mode > 0)
7198 __kmp_set_nesting_mode_threads();
7202 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7204 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7209 for (i = 0; i < __kmp_threads_capacity; i++) {
7210 kmp_info_t *thread = __kmp_threads[i];
7213 if (thread->th.th_current_task->td_icvs.nproc != 0)
7216 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7221 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7222 __kmp_dflt_team_nth));
7224 #ifdef KMP_ADJUST_BLOCKTIME
7226 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7227 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7228 if (__kmp_nth > __kmp_avail_proc) {
7229 __kmp_zero_bt = TRUE;
7235 TCW_SYNC_4(__kmp_init_middle, TRUE);
7237 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7240 void __kmp_middle_initialize(
void) {
7241 if (__kmp_init_middle) {
7244 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7245 if (__kmp_init_middle) {
7246 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7249 __kmp_do_middle_initialize();
7250 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7253 void __kmp_parallel_initialize(
void) {
7254 int gtid = __kmp_entry_gtid();
7257 if (TCR_4(__kmp_init_parallel))
7259 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7260 if (TCR_4(__kmp_init_parallel)) {
7261 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7266 if (TCR_4(__kmp_global.g.g_done)) {
7269 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7270 __kmp_infinite_loop();
7276 if (!__kmp_init_middle) {
7277 __kmp_do_middle_initialize();
7279 __kmp_assign_root_init_mask();
7280 __kmp_resume_if_hard_paused();
7283 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7284 KMP_ASSERT(KMP_UBER_GTID(gtid));
7286 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7289 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7290 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7291 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7295 #if KMP_HANDLE_SIGNALS
7297 __kmp_install_signals(TRUE);
7301 __kmp_suspend_initialize();
7303 #if defined(USE_LOAD_BALANCE)
7304 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7305 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7308 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7309 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7313 if (__kmp_version) {
7314 __kmp_print_version_2();
7318 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7321 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7323 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7326 void __kmp_hidden_helper_initialize() {
7327 if (TCR_4(__kmp_init_hidden_helper))
7331 if (!TCR_4(__kmp_init_parallel))
7332 __kmp_parallel_initialize();
7336 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7337 if (TCR_4(__kmp_init_hidden_helper)) {
7338 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7343 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7347 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7350 __kmp_do_initialize_hidden_helper_threads();
7353 __kmp_hidden_helper_threads_initz_wait();
7356 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7358 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7363 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7365 kmp_disp_t *dispatch;
7370 this_thr->th.th_local.this_construct = 0;
7371 #if KMP_CACHE_MANAGE
7372 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7374 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7375 KMP_DEBUG_ASSERT(dispatch);
7376 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7380 dispatch->th_disp_index = 0;
7381 dispatch->th_doacross_buf_idx = 0;
7382 if (__kmp_env_consistency_check)
7383 __kmp_push_parallel(gtid, team->t.t_ident);
7388 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7390 if (__kmp_env_consistency_check)
7391 __kmp_pop_parallel(gtid, team->t.t_ident);
7393 __kmp_finish_implicit_task(this_thr);
7396 int __kmp_invoke_task_func(
int gtid) {
7398 int tid = __kmp_tid_from_gtid(gtid);
7399 kmp_info_t *this_thr = __kmp_threads[gtid];
7400 kmp_team_t *team = this_thr->th.th_team;
7402 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7404 if (__itt_stack_caller_create_ptr) {
7406 if (team->t.t_stack_id != NULL) {
7407 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7409 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7410 __kmp_itt_stack_callee_enter(
7411 (__itt_caller)team->t.t_parent->t.t_stack_id);
7415 #if INCLUDE_SSC_MARKS
7416 SSC_MARK_INVOKING();
7421 void **exit_frame_p;
7422 ompt_data_t *my_task_data;
7423 ompt_data_t *my_parallel_data;
7426 if (ompt_enabled.enabled) {
7427 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7428 .ompt_task_info.frame.exit_frame.ptr);
7430 exit_frame_p = &dummy;
7434 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7435 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7436 if (ompt_enabled.ompt_callback_implicit_task) {
7437 ompt_team_size = team->t.t_nproc;
7438 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7439 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7440 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7441 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7445 #if KMP_STATS_ENABLED
7447 if (previous_state == stats_state_e::TEAMS_REGION) {
7448 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7450 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7452 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7455 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7456 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7463 *exit_frame_p = NULL;
7464 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7467 #if KMP_STATS_ENABLED
7468 if (previous_state == stats_state_e::TEAMS_REGION) {
7469 KMP_SET_THREAD_STATE(previous_state);
7471 KMP_POP_PARTITIONED_TIMER();
7475 if (__itt_stack_caller_create_ptr) {
7477 if (team->t.t_stack_id != NULL) {
7478 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7480 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7481 __kmp_itt_stack_callee_leave(
7482 (__itt_caller)team->t.t_parent->t.t_stack_id);
7486 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7491 void __kmp_teams_master(
int gtid) {
7493 kmp_info_t *thr = __kmp_threads[gtid];
7494 kmp_team_t *team = thr->th.th_team;
7495 ident_t *loc = team->t.t_ident;
7496 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7497 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7498 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7499 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7500 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7503 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7506 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7507 tmp->cg_nthreads = 1;
7508 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7509 " cg_nthreads to 1\n",
7511 tmp->up = thr->th.th_cg_roots;
7512 thr->th.th_cg_roots = tmp;
7516 #if INCLUDE_SSC_MARKS
7519 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7520 (microtask_t)thr->th.th_teams_microtask,
7521 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7522 #if INCLUDE_SSC_MARKS
7526 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7527 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7530 __kmp_join_call(loc, gtid
7539 int __kmp_invoke_teams_master(
int gtid) {
7540 kmp_info_t *this_thr = __kmp_threads[gtid];
7541 kmp_team_t *team = this_thr->th.th_team;
7543 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7544 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7545 (
void *)__kmp_teams_master);
7547 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7549 int tid = __kmp_tid_from_gtid(gtid);
7550 ompt_data_t *task_data =
7551 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7552 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7553 if (ompt_enabled.ompt_callback_implicit_task) {
7554 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7555 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7557 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7560 __kmp_teams_master(gtid);
7562 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7564 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7573 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7574 kmp_info_t *thr = __kmp_threads[gtid];
7576 if (num_threads > 0)
7577 thr->th.th_set_nproc = num_threads;
7580 static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7582 KMP_DEBUG_ASSERT(thr);
7584 if (!TCR_4(__kmp_init_middle))
7585 __kmp_middle_initialize();
7586 __kmp_assign_root_init_mask();
7587 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7588 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7590 if (num_threads == 0) {
7591 if (__kmp_teams_thread_limit > 0) {
7592 num_threads = __kmp_teams_thread_limit;
7594 num_threads = __kmp_avail_proc / num_teams;
7599 if (num_threads > __kmp_dflt_team_nth) {
7600 num_threads = __kmp_dflt_team_nth;
7602 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7603 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7605 if (num_teams * num_threads > __kmp_teams_max_nth) {
7606 num_threads = __kmp_teams_max_nth / num_teams;
7608 if (num_threads == 0) {
7614 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7616 if (num_threads > __kmp_dflt_team_nth) {
7617 num_threads = __kmp_dflt_team_nth;
7619 if (num_teams * num_threads > __kmp_teams_max_nth) {
7620 int new_threads = __kmp_teams_max_nth / num_teams;
7621 if (new_threads == 0) {
7624 if (new_threads != num_threads) {
7625 if (!__kmp_reserve_warn) {
7626 __kmp_reserve_warn = 1;
7627 __kmp_msg(kmp_ms_warning,
7628 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7629 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7632 num_threads = new_threads;
7635 thr->th.th_teams_size.nth = num_threads;
7640 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7642 kmp_info_t *thr = __kmp_threads[gtid];
7643 KMP_DEBUG_ASSERT(num_teams >= 0);
7644 KMP_DEBUG_ASSERT(num_threads >= 0);
7646 if (num_teams == 0) {
7647 if (__kmp_nteams > 0) {
7648 num_teams = __kmp_nteams;
7653 if (num_teams > __kmp_teams_max_nth) {
7654 if (!__kmp_reserve_warn) {
7655 __kmp_reserve_warn = 1;
7656 __kmp_msg(kmp_ms_warning,
7657 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7658 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7660 num_teams = __kmp_teams_max_nth;
7664 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7666 __kmp_push_thread_limit(thr, num_teams, num_threads);
7671 void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7672 int num_teams_ub,
int num_threads) {
7673 kmp_info_t *thr = __kmp_threads[gtid];
7674 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7675 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7676 KMP_DEBUG_ASSERT(num_threads >= 0);
7678 if (num_teams_lb > num_teams_ub) {
7679 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7680 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7685 if (num_teams_lb == 0 && num_teams_ub > 0)
7686 num_teams_lb = num_teams_ub;
7688 if (num_teams_lb == 0 && num_teams_ub == 0) {
7689 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7690 if (num_teams > __kmp_teams_max_nth) {
7691 if (!__kmp_reserve_warn) {
7692 __kmp_reserve_warn = 1;
7693 __kmp_msg(kmp_ms_warning,
7694 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7695 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7697 num_teams = __kmp_teams_max_nth;
7699 }
else if (num_teams_lb == num_teams_ub) {
7700 num_teams = num_teams_ub;
7702 if (num_threads == 0) {
7703 if (num_teams_ub > __kmp_teams_max_nth) {
7704 num_teams = num_teams_lb;
7706 num_teams = num_teams_ub;
7709 num_teams = (num_threads > __kmp_teams_max_nth)
7711 : __kmp_teams_max_nth / num_threads;
7712 if (num_teams < num_teams_lb) {
7713 num_teams = num_teams_lb;
7714 }
else if (num_teams > num_teams_ub) {
7715 num_teams = num_teams_ub;
7721 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7723 __kmp_push_thread_limit(thr, num_teams, num_threads);
7727 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7728 kmp_info_t *thr = __kmp_threads[gtid];
7729 thr->th.th_set_proc_bind = proc_bind;
7734 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7735 kmp_info_t *this_thr = __kmp_threads[gtid];
7741 KMP_DEBUG_ASSERT(team);
7742 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7743 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7746 team->t.t_construct = 0;
7747 team->t.t_ordered.dt.t_value =
7751 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7752 if (team->t.t_max_nproc > 1) {
7754 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7755 team->t.t_disp_buffer[i].buffer_index = i;
7756 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7759 team->t.t_disp_buffer[0].buffer_index = 0;
7760 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7764 KMP_ASSERT(this_thr->th.th_team == team);
7767 for (f = 0; f < team->t.t_nproc; f++) {
7768 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7769 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7774 __kmp_fork_barrier(gtid, 0);
7777 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7778 kmp_info_t *this_thr = __kmp_threads[gtid];
7780 KMP_DEBUG_ASSERT(team);
7781 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7782 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7788 if (__kmp_threads[gtid] &&
7789 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7790 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7791 __kmp_threads[gtid]);
7792 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7793 "team->t.t_nproc=%d\n",
7794 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7796 __kmp_print_structure();
7798 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7799 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7802 __kmp_join_barrier(gtid);
7804 if (ompt_enabled.enabled &&
7805 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7806 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7807 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7808 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7810 void *codeptr = NULL;
7811 if (KMP_MASTER_TID(ds_tid) &&
7812 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7813 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7814 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7816 if (ompt_enabled.ompt_callback_sync_region_wait) {
7817 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7818 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7821 if (ompt_enabled.ompt_callback_sync_region) {
7822 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7823 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7827 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7828 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7829 ompt_scope_end, NULL, task_data, 0, ds_tid,
7830 ompt_task_implicit);
7836 KMP_ASSERT(this_thr->th.th_team == team);
7841 #ifdef USE_LOAD_BALANCE
7845 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7848 kmp_team_t *hot_team;
7850 if (root->r.r_active) {
7853 hot_team = root->r.r_hot_team;
7854 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7855 return hot_team->t.t_nproc - 1;
7860 for (i = 1; i < hot_team->t.t_nproc; i++) {
7861 if (hot_team->t.t_threads[i]->th.th_active) {
7870 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7873 int hot_team_active;
7874 int team_curr_active;
7877 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7879 KMP_DEBUG_ASSERT(root);
7880 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7881 ->th.th_current_task->td_icvs.dynamic == TRUE);
7882 KMP_DEBUG_ASSERT(set_nproc > 1);
7884 if (set_nproc == 1) {
7885 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7894 pool_active = __kmp_thread_pool_active_nth;
7895 hot_team_active = __kmp_active_hot_team_nproc(root);
7896 team_curr_active = pool_active + hot_team_active + 1;
7899 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7900 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
7901 "hot team active = %d\n",
7902 system_active, pool_active, hot_team_active));
7904 if (system_active < 0) {
7908 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7909 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7912 retval = __kmp_avail_proc - __kmp_nth +
7913 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7914 if (retval > set_nproc) {
7917 if (retval < KMP_MIN_NTH) {
7918 retval = KMP_MIN_NTH;
7921 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7929 if (system_active < team_curr_active) {
7930 system_active = team_curr_active;
7932 retval = __kmp_avail_proc - system_active + team_curr_active;
7933 if (retval > set_nproc) {
7936 if (retval < KMP_MIN_NTH) {
7937 retval = KMP_MIN_NTH;
7940 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7949 void __kmp_cleanup(
void) {
7952 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7954 if (TCR_4(__kmp_init_parallel)) {
7955 #if KMP_HANDLE_SIGNALS
7956 __kmp_remove_signals();
7958 TCW_4(__kmp_init_parallel, FALSE);
7961 if (TCR_4(__kmp_init_middle)) {
7962 #if KMP_AFFINITY_SUPPORTED
7963 __kmp_affinity_uninitialize();
7965 __kmp_cleanup_hierarchy();
7966 TCW_4(__kmp_init_middle, FALSE);
7969 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7971 if (__kmp_init_serial) {
7972 __kmp_runtime_destroy();
7973 __kmp_init_serial = FALSE;
7976 __kmp_cleanup_threadprivate_caches();
7978 for (f = 0; f < __kmp_threads_capacity; f++) {
7979 if (__kmp_root[f] != NULL) {
7980 __kmp_free(__kmp_root[f]);
7981 __kmp_root[f] = NULL;
7984 __kmp_free(__kmp_threads);
7987 __kmp_threads = NULL;
7989 __kmp_threads_capacity = 0;
7991 #if KMP_USE_DYNAMIC_LOCK
7992 __kmp_cleanup_indirect_user_locks();
7994 __kmp_cleanup_user_locks();
7998 __kmp_free(ompd_env_block);
7999 ompd_env_block = NULL;
8000 ompd_env_block_size = 0;
8004 #if KMP_AFFINITY_SUPPORTED
8005 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8006 __kmp_cpuinfo_file = NULL;
8009 #if KMP_USE_ADAPTIVE_LOCKS
8010 #if KMP_DEBUG_ADAPTIVE_LOCKS
8011 __kmp_print_speculative_stats();
8014 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8015 __kmp_nested_nth.nth = NULL;
8016 __kmp_nested_nth.size = 0;
8017 __kmp_nested_nth.used = 0;
8018 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8019 __kmp_nested_proc_bind.bind_types = NULL;
8020 __kmp_nested_proc_bind.size = 0;
8021 __kmp_nested_proc_bind.used = 0;
8022 if (__kmp_affinity_format) {
8023 KMP_INTERNAL_FREE(__kmp_affinity_format);
8024 __kmp_affinity_format = NULL;
8027 __kmp_i18n_catclose();
8029 #if KMP_USE_HIER_SCHED
8030 __kmp_hier_scheds.deallocate();
8033 #if KMP_STATS_ENABLED
8037 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8042 int __kmp_ignore_mppbeg(
void) {
8045 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8046 if (__kmp_str_match_false(env))
8053 int __kmp_ignore_mppend(
void) {
8056 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8057 if (__kmp_str_match_false(env))
8064 void __kmp_internal_begin(
void) {
8070 gtid = __kmp_entry_gtid();
8071 root = __kmp_threads[gtid]->th.th_root;
8072 KMP_ASSERT(KMP_UBER_GTID(gtid));
8074 if (root->r.r_begin)
8076 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8077 if (root->r.r_begin) {
8078 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8082 root->r.r_begin = TRUE;
8084 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8089 void __kmp_user_set_library(
enum library_type arg) {
8096 gtid = __kmp_entry_gtid();
8097 thread = __kmp_threads[gtid];
8099 root = thread->th.th_root;
8101 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8103 if (root->r.r_in_parallel) {
8105 KMP_WARNING(SetLibraryIncorrectCall);
8110 case library_serial:
8111 thread->th.th_set_nproc = 0;
8112 set__nproc(thread, 1);
8114 case library_turnaround:
8115 thread->th.th_set_nproc = 0;
8116 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8117 : __kmp_dflt_team_nth_ub);
8119 case library_throughput:
8120 thread->th.th_set_nproc = 0;
8121 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8122 : __kmp_dflt_team_nth_ub);
8125 KMP_FATAL(UnknownLibraryType, arg);
8128 __kmp_aux_set_library(arg);
8131 void __kmp_aux_set_stacksize(
size_t arg) {
8132 if (!__kmp_init_serial)
8133 __kmp_serial_initialize();
8136 if (arg & (0x1000 - 1)) {
8137 arg &= ~(0x1000 - 1);
8142 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8145 if (!TCR_4(__kmp_init_parallel)) {
8148 if (value < __kmp_sys_min_stksize)
8149 value = __kmp_sys_min_stksize;
8150 else if (value > KMP_MAX_STKSIZE)
8151 value = KMP_MAX_STKSIZE;
8153 __kmp_stksize = value;
8155 __kmp_env_stksize = TRUE;
8158 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8163 void __kmp_aux_set_library(
enum library_type arg) {
8164 __kmp_library = arg;
8166 switch (__kmp_library) {
8167 case library_serial: {
8168 KMP_INFORM(LibraryIsSerial);
8170 case library_turnaround:
8171 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8172 __kmp_use_yield = 2;
8174 case library_throughput:
8175 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8176 __kmp_dflt_blocktime = 200;
8179 KMP_FATAL(UnknownLibraryType, arg);
8185 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8186 kmp_info_t *thr = __kmp_entry_thread();
8187 teams_serialized = 0;
8188 if (thr->th.th_teams_microtask) {
8189 kmp_team_t *team = thr->th.th_team;
8190 int tlevel = thr->th.th_teams_level;
8191 int ii = team->t.t_level;
8192 teams_serialized = team->t.t_serialized;
8193 int level = tlevel + 1;
8194 KMP_DEBUG_ASSERT(ii >= tlevel);
8195 while (ii > level) {
8196 for (teams_serialized = team->t.t_serialized;
8197 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8199 if (team->t.t_serialized && (!teams_serialized)) {
8200 team = team->t.t_parent;
8204 team = team->t.t_parent;
8213 int __kmp_aux_get_team_num() {
8215 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8217 if (serialized > 1) {
8220 return team->t.t_master_tid;
8226 int __kmp_aux_get_num_teams() {
8228 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8230 if (serialized > 1) {
8233 return team->t.t_parent->t.t_nproc;
8272 typedef struct kmp_affinity_format_field_t {
8274 const char *long_name;
8277 } kmp_affinity_format_field_t;
8279 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8280 #if KMP_AFFINITY_SUPPORTED
8281 {
'A',
"thread_affinity",
's'},
8283 {
't',
"team_num",
'd'},
8284 {
'T',
"num_teams",
'd'},
8285 {
'L',
"nesting_level",
'd'},
8286 {
'n',
"thread_num",
'd'},
8287 {
'N',
"num_threads",
'd'},
8288 {
'a',
"ancestor_tnum",
'd'},
8290 {
'P',
"process_id",
'd'},
8291 {
'i',
"native_thread_id",
'd'}};
8294 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8296 kmp_str_buf_t *field_buffer) {
8297 int rc, format_index, field_value;
8298 const char *width_left, *width_right;
8299 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8300 static const int FORMAT_SIZE = 20;
8301 char format[FORMAT_SIZE] = {0};
8302 char absolute_short_name = 0;
8304 KMP_DEBUG_ASSERT(gtid >= 0);
8305 KMP_DEBUG_ASSERT(th);
8306 KMP_DEBUG_ASSERT(**ptr ==
'%');
8307 KMP_DEBUG_ASSERT(field_buffer);
8309 __kmp_str_buf_clear(field_buffer);
8316 __kmp_str_buf_cat(field_buffer,
"%", 1);
8327 right_justify =
false;
8329 right_justify =
true;
8333 width_left = width_right = NULL;
8334 if (**ptr >=
'0' && **ptr <=
'9') {
8342 format[format_index++] =
'%';
8344 format[format_index++] =
'-';
8346 format[format_index++] =
'0';
8347 if (width_left && width_right) {
8351 while (i < 8 && width_left < width_right) {
8352 format[format_index++] = *width_left;
8360 found_valid_name =
false;
8361 parse_long_name = (**ptr ==
'{');
8362 if (parse_long_name)
8364 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8365 sizeof(__kmp_affinity_format_table[0]);
8367 char short_name = __kmp_affinity_format_table[i].short_name;
8368 const char *long_name = __kmp_affinity_format_table[i].long_name;
8369 char field_format = __kmp_affinity_format_table[i].field_format;
8370 if (parse_long_name) {
8371 size_t length = KMP_STRLEN(long_name);
8372 if (strncmp(*ptr, long_name, length) == 0) {
8373 found_valid_name =
true;
8376 }
else if (**ptr == short_name) {
8377 found_valid_name =
true;
8380 if (found_valid_name) {
8381 format[format_index++] = field_format;
8382 format[format_index++] =
'\0';
8383 absolute_short_name = short_name;
8387 if (parse_long_name) {
8389 absolute_short_name = 0;
8397 switch (absolute_short_name) {
8399 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8402 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8405 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8408 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8411 static const int BUFFER_SIZE = 256;
8412 char buf[BUFFER_SIZE];
8413 __kmp_expand_host_name(buf, BUFFER_SIZE);
8414 rc = __kmp_str_buf_print(field_buffer, format, buf);
8417 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8420 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8423 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8427 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8428 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8430 #if KMP_AFFINITY_SUPPORTED
8433 __kmp_str_buf_init(&buf);
8434 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8435 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8436 __kmp_str_buf_free(&buf);
8442 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8444 if (parse_long_name) {
8453 KMP_ASSERT(format_index <= FORMAT_SIZE);
8463 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8464 kmp_str_buf_t *buffer) {
8465 const char *parse_ptr;
8467 const kmp_info_t *th;
8468 kmp_str_buf_t field;
8470 KMP_DEBUG_ASSERT(buffer);
8471 KMP_DEBUG_ASSERT(gtid >= 0);
8473 __kmp_str_buf_init(&field);
8474 __kmp_str_buf_clear(buffer);
8476 th = __kmp_threads[gtid];
8482 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8483 parse_ptr = __kmp_affinity_format;
8485 KMP_DEBUG_ASSERT(parse_ptr);
8487 while (*parse_ptr !=
'\0') {
8489 if (*parse_ptr ==
'%') {
8491 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8492 __kmp_str_buf_catbuf(buffer, &field);
8496 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8501 __kmp_str_buf_free(&field);
8506 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8508 __kmp_str_buf_init(&buf);
8509 __kmp_aux_capture_affinity(gtid, format, &buf);
8510 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8511 __kmp_str_buf_free(&buf);
8516 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8517 int blocktime = arg;
8523 __kmp_save_internal_controls(thread);
8526 if (blocktime < KMP_MIN_BLOCKTIME)
8527 blocktime = KMP_MIN_BLOCKTIME;
8528 else if (blocktime > KMP_MAX_BLOCKTIME)
8529 blocktime = KMP_MAX_BLOCKTIME;
8531 set__blocktime_team(thread->th.th_team, tid, blocktime);
8532 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8536 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8538 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8539 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8545 set__bt_set_team(thread->th.th_team, tid, bt_set);
8546 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8548 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8549 "bt_intervals=%d, monitor_updates=%d\n",
8550 __kmp_gtid_from_tid(tid, thread->th.th_team),
8551 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8552 __kmp_monitor_wakeups));
8554 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8555 __kmp_gtid_from_tid(tid, thread->th.th_team),
8556 thread->th.th_team->t.t_id, tid, blocktime));
8560 void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8561 if (!__kmp_init_serial) {
8562 __kmp_serial_initialize();
8564 __kmp_env_initialize(str);
8566 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8574 PACKED_REDUCTION_METHOD_T
8575 __kmp_determine_reduction_method(
8576 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8577 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8578 kmp_critical_name *lck) {
8589 PACKED_REDUCTION_METHOD_T retval;
8593 KMP_DEBUG_ASSERT(loc);
8594 KMP_DEBUG_ASSERT(lck);
8596 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8597 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
8598 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8600 retval = critical_reduce_block;
8603 team_size = __kmp_get_team_num_threads(global_tid);
8604 if (team_size == 1) {
8606 retval = empty_reduce_block;
8610 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8612 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8613 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
8615 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8616 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8618 int teamsize_cutoff = 4;
8620 #if KMP_MIC_SUPPORTED
8621 if (__kmp_mic_type != non_mic) {
8622 teamsize_cutoff = 8;
8625 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8626 if (tree_available) {
8627 if (team_size <= teamsize_cutoff) {
8628 if (atomic_available) {
8629 retval = atomic_reduce_block;
8632 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8634 }
else if (atomic_available) {
8635 retval = atomic_reduce_block;
8638 #error "Unknown or unsupported OS"
8642 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8644 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8648 if (atomic_available) {
8649 if (num_vars <= 2) {
8650 retval = atomic_reduce_block;
8656 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8657 if (atomic_available && (num_vars <= 3)) {
8658 retval = atomic_reduce_block;
8659 }
else if (tree_available) {
8660 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8661 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8662 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8667 #error "Unknown or unsupported OS"
8671 #error "Unknown or unsupported architecture"
8679 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8682 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8684 int atomic_available, tree_available;
8686 switch ((forced_retval = __kmp_force_reduction_method)) {
8687 case critical_reduce_block:
8691 case atomic_reduce_block:
8692 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8693 if (!atomic_available) {
8694 KMP_WARNING(RedMethodNotSupported,
"atomic");
8695 forced_retval = critical_reduce_block;
8699 case tree_reduce_block:
8700 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8701 if (!tree_available) {
8702 KMP_WARNING(RedMethodNotSupported,
"tree");
8703 forced_retval = critical_reduce_block;
8705 #if KMP_FAST_REDUCTION_BARRIER
8706 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8715 retval = forced_retval;
8718 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8720 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8721 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8726 kmp_int32 __kmp_get_reduce_method(
void) {
8727 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8732 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8736 void __kmp_hard_pause() {
8737 __kmp_pause_status = kmp_hard_paused;
8738 __kmp_internal_end_thread(-1);
8742 void __kmp_resume_if_soft_paused() {
8743 if (__kmp_pause_status == kmp_soft_paused) {
8744 __kmp_pause_status = kmp_not_paused;
8746 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8747 kmp_info_t *thread = __kmp_threads[gtid];
8749 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8751 if (fl.is_sleeping())
8753 else if (__kmp_try_suspend_mx(thread)) {
8754 __kmp_unlock_suspend_mx(thread);
8757 if (fl.is_sleeping()) {
8760 }
else if (__kmp_try_suspend_mx(thread)) {
8761 __kmp_unlock_suspend_mx(thread);
8773 int __kmp_pause_resource(kmp_pause_status_t level) {
8774 if (level == kmp_not_paused) {
8775 if (__kmp_pause_status == kmp_not_paused) {
8779 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8780 __kmp_pause_status == kmp_hard_paused);
8781 __kmp_pause_status = kmp_not_paused;
8784 }
else if (level == kmp_soft_paused) {
8785 if (__kmp_pause_status != kmp_not_paused) {
8792 }
else if (level == kmp_hard_paused) {
8793 if (__kmp_pause_status != kmp_not_paused) {
8806 void __kmp_omp_display_env(
int verbose) {
8807 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8808 if (__kmp_init_serial == 0)
8809 __kmp_do_serial_initialize();
8810 __kmp_display_env_impl(!verbose, verbose);
8811 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8815 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
8817 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
8819 kmp_info_t **other_threads = team->t.t_threads;
8823 for (
int f = 1; f < old_nthreads; ++f) {
8824 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
8826 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
8832 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
8833 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
8837 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
8839 team->t.t_threads[f]->th.th_used_in_team.store(2);
8840 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
8843 kmp_uint64 new_value;
8844 new_value = team->t.b->go_release();
8851 int count = old_nthreads - 1;
8853 count = old_nthreads - 1;
8854 for (
int f = 1; f < old_nthreads; ++f) {
8855 my_go_index = f / team->t.b->threads_per_go;
8856 if (other_threads[f]->th.th_used_in_team.load() != 0) {
8857 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
8858 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
8859 void *, other_threads[f]->th.th_sleep_loc);
8860 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
8863 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
8869 team->t.b->update_num_threads(new_nthreads);
8870 team->t.b->go_reset();
8873 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
8875 KMP_DEBUG_ASSERT(team);
8881 for (
int f = 1; f < new_nthreads; ++f) {
8882 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
8883 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
8885 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
8886 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
8887 (kmp_flag_32<false, false> *)NULL);
8893 int count = new_nthreads - 1;
8895 count = new_nthreads - 1;
8896 for (
int f = 1; f < new_nthreads; ++f) {
8897 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
8905 kmp_info_t **__kmp_hidden_helper_threads;
8906 kmp_info_t *__kmp_hidden_helper_main_thread;
8907 std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
8909 kmp_int32 __kmp_hidden_helper_threads_num = 8;
8910 kmp_int32 __kmp_enable_hidden_helper = TRUE;
8912 kmp_int32 __kmp_hidden_helper_threads_num = 0;
8913 kmp_int32 __kmp_enable_hidden_helper = FALSE;
8917 std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
8919 void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
8924 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
8925 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
8926 __kmp_hidden_helper_threads_num)
8932 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
8933 __kmp_hidden_helper_initz_release();
8934 __kmp_hidden_helper_main_thread_wait();
8936 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
8937 __kmp_hidden_helper_worker_thread_signal();
8943 void __kmp_hidden_helper_threads_initz_routine() {
8945 const int gtid = __kmp_register_root(TRUE);
8946 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
8947 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
8948 __kmp_hidden_helper_main_thread->th.th_set_nproc =
8949 __kmp_hidden_helper_threads_num;
8951 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
8956 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
8958 __kmp_hidden_helper_threads_deinitz_release();
8978 void __kmp_init_nesting_mode() {
8979 int levels = KMP_HW_LAST;
8980 __kmp_nesting_mode_nlevels = levels;
8981 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
8982 for (
int i = 0; i < levels; ++i)
8983 __kmp_nesting_nth_level[i] = 0;
8984 if (__kmp_nested_nth.size < levels) {
8985 __kmp_nested_nth.nth =
8986 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
8987 __kmp_nested_nth.size = levels;
8992 void __kmp_set_nesting_mode_threads() {
8993 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
8995 if (__kmp_nesting_mode == 1)
8996 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
8997 else if (__kmp_nesting_mode > 1)
8998 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9000 if (__kmp_topology) {
9002 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9003 loc < __kmp_nesting_mode_nlevels;
9004 loc++, hw_level++) {
9005 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9006 if (__kmp_nesting_nth_level[loc] == 1)
9010 if (__kmp_nesting_mode > 1 && loc > 1) {
9011 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9012 int num_cores = __kmp_topology->get_count(core_level);
9013 int upper_levels = 1;
9014 for (
int level = 0; level < loc - 1; ++level)
9015 upper_levels *= __kmp_nesting_nth_level[level];
9016 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9017 __kmp_nesting_nth_level[loc - 1] =
9018 num_cores / __kmp_nesting_nth_level[loc - 2];
9020 __kmp_nesting_mode_nlevels = loc;
9021 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9023 if (__kmp_avail_proc >= 4) {
9024 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9025 __kmp_nesting_nth_level[1] = 2;
9026 __kmp_nesting_mode_nlevels = 2;
9028 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9029 __kmp_nesting_mode_nlevels = 1;
9031 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9033 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9034 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9036 set__nproc(thread, __kmp_nesting_nth_level[0]);
9037 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9038 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9039 if (get__max_active_levels(thread) > 1) {
9041 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9043 if (__kmp_nesting_mode == 1)
9044 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)