14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
28#include "kmp_dispatch_hier.h"
32#include "ompt-specific.h"
35#include "ompd-specific.h"
38#if OMP_PROFILING_SUPPORT
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile =
nullptr;
44#define KMP_USE_PRCTL 0
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77kmp_info_t __kmp_monitor;
82void __kmp_cleanup(
void);
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89#if KMP_AFFINITY_SUPPORTED
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93static void __kmp_do_serial_initialize(
void);
94void __kmp_fork_barrier(
int gtid,
int tid);
95void __kmp_join_barrier(
int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99#ifdef USE_LOAD_BALANCE
100static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103static int __kmp_expand_threads(
int nNeed);
105static int __kmp_unregister_root_other_thread(
int gtid);
107static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
189 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
190 "thread, using TLS\n"));
191 i = __kmp_gtid_get_specific();
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i);
205 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
213 stack_base - stack_addr);
217 if (__kmp_storage_map) {
218 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
227int __kmp_get_global_thread_id_reg() {
230 if (!__kmp_init_serial) {
234 if (TCR_4(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
239 if (TCR_4(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
241 gtid = __kmp_gtid_get_specific();
244 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
245 gtid = __kmp_get_global_thread_id();
249 if (gtid == KMP_GTID_DNE) {
251 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
252 "Registering a new gtid.\n"));
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
258 gtid = __kmp_register_root(FALSE);
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
264 KMP_DEBUG_ASSERT(gtid >= 0);
270void __kmp_check_stack_overlap(kmp_info_t *th) {
272 char *stack_beg = NULL;
273 char *stack_end = NULL;
276 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
277 if (__kmp_storage_map) {
278 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
281 gtid = __kmp_gtid_from_thread(th);
283 if (gtid == KMP_GTID_MONITOR) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)",
"mon",
287 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
301 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
302 if (stack_beg == NULL) {
303 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
325 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
331 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
336void __kmp_infinite_loop(
void) {
337 static int done = FALSE;
344#define MAX_MESSAGE 512
346void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE];
351 va_start(ap, format);
352 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (
unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356#if KMP_PRINT_DATA_PLACEMENT
359 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
363 __kmp_storage_map_verbose = FALSE;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
369 const int page_size = KMP_GET_PAGE_SIZE();
371 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
374 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
377 __kmp_printf_no_lock(
" GTID %d\n", gtid);
386 (
char *)p1 += page_size;
387 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
392 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
393 (
char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
396 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
397 (
char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
404 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
410void __kmp_warn(
char const *format, ...) {
411 char buffer[MAX_MESSAGE];
414 if (__kmp_generate_warnings == kmp_warnings_off) {
418 va_start(ap, format);
420 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
428void __kmp_abort_process() {
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
436 if (KMP_OS_WINDOWS) {
439 __kmp_global.g.g_abort = SIGABRT;
453 __kmp_unregister_library();
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
462void __kmp_abort_thread(
void) {
465 __kmp_infinite_loop();
471static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
495#if KMP_FAST_REDUCTION_BARRIER
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
506static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
507 int team_id,
int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
527#if KMP_FAST_REDUCTION_BARRIER
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
548static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
552static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
559BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
564 case DLL_PROCESS_ATTACH:
565 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
569 case DLL_PROCESS_DETACH:
570 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
583 if (lpReserved == NULL)
584 __kmp_internal_end_library(__kmp_gtid_get_specific());
588 case DLL_THREAD_ATTACH:
589 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
595 case DLL_THREAD_DETACH:
596 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
598 __kmp_internal_end_thread(__kmp_gtid_get_specific());
609void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
610 int gtid = *gtid_ref;
611#ifdef BUILD_PARALLEL_ORDERED
612 kmp_team_t *team = __kmp_team_from_gtid(gtid);
615 if (__kmp_env_consistency_check) {
616 if (__kmp_threads[gtid]->th.th_root->r.r_active)
617#if KMP_USE_DYNAMIC_LOCK
618 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
620 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
623#ifdef BUILD_PARALLEL_ORDERED
624 if (!team->t.t_serialized) {
626 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
634void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
635 int gtid = *gtid_ref;
636#ifdef BUILD_PARALLEL_ORDERED
637 int tid = __kmp_tid_from_gtid(gtid);
638 kmp_team_t *team = __kmp_team_from_gtid(gtid);
641 if (__kmp_env_consistency_check) {
642 if (__kmp_threads[gtid]->th.th_root->r.r_active)
643 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
645#ifdef BUILD_PARALLEL_ORDERED
646 if (!team->t.t_serialized) {
651 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
661int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
666 if (!TCR_4(__kmp_init_parallel))
667 __kmp_parallel_initialize();
668 __kmp_resume_if_soft_paused();
670 th = __kmp_threads[gtid];
671 team = th->th.th_team;
674 th->th.th_ident = id_ref;
676 if (team->t.t_serialized) {
679 kmp_int32 old_this = th->th.th_local.this_construct;
681 ++th->th.th_local.this_construct;
685 if (team->t.t_construct == old_this) {
686 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
687 th->th.th_local.this_construct);
690 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
691 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
692 team->t.t_active_level == 1) {
694 __kmp_itt_metadata_single(id_ref);
699 if (__kmp_env_consistency_check) {
700 if (status && push_ws) {
701 __kmp_push_workshare(gtid, ct_psingle, id_ref);
703 __kmp_check_workshare(gtid, ct_psingle, id_ref);
708 __kmp_itt_single_start(gtid);
714void __kmp_exit_single(
int gtid) {
716 __kmp_itt_single_end(gtid);
718 if (__kmp_env_consistency_check)
719 __kmp_pop_workshare(gtid, ct_psingle, NULL);
728static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
729 int master_tid,
int set_nthreads,
733 KMP_DEBUG_ASSERT(__kmp_init_serial);
734 KMP_DEBUG_ASSERT(root && parent_team);
735 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
739 new_nthreads = set_nthreads;
740 if (!get__dynamic_2(parent_team, master_tid)) {
743#ifdef USE_LOAD_BALANCE
744 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
745 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
746 if (new_nthreads == 1) {
747 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
748 "reservation to 1 thread\n",
752 if (new_nthreads < set_nthreads) {
753 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
754 "reservation to %d threads\n",
755 master_tid, new_nthreads));
759 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
760 new_nthreads = __kmp_avail_proc - __kmp_nth +
761 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
762 if (new_nthreads <= 1) {
763 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
764 "reservation to 1 thread\n",
768 if (new_nthreads < set_nthreads) {
769 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
770 "reservation to %d threads\n",
771 master_tid, new_nthreads));
773 new_nthreads = set_nthreads;
775 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
776 if (set_nthreads > 2) {
777 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
778 new_nthreads = (new_nthreads % set_nthreads) + 1;
779 if (new_nthreads == 1) {
780 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
781 "reservation to 1 thread\n",
785 if (new_nthreads < set_nthreads) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
787 "reservation to %d threads\n",
788 master_tid, new_nthreads));
796 if (__kmp_nth + new_nthreads -
797 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
799 int tl_nthreads = __kmp_max_nth - __kmp_nth +
800 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
801 if (tl_nthreads <= 0) {
806 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
807 __kmp_reserve_warn = 1;
808 __kmp_msg(kmp_ms_warning,
809 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
810 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
812 if (tl_nthreads == 1) {
813 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
814 "reduced reservation to 1 thread\n",
818 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
819 "reservation to %d threads\n",
820 master_tid, tl_nthreads));
821 new_nthreads = tl_nthreads;
825 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
826 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
827 if (cg_nthreads + new_nthreads -
828 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
830 int tl_nthreads = max_cg_threads - cg_nthreads +
831 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
832 if (tl_nthreads <= 0) {
837 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
838 __kmp_reserve_warn = 1;
839 __kmp_msg(kmp_ms_warning,
840 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
841 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
843 if (tl_nthreads == 1) {
844 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
845 "reduced reservation to 1 thread\n",
849 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
850 "reservation to %d threads\n",
851 master_tid, tl_nthreads));
852 new_nthreads = tl_nthreads;
858 capacity = __kmp_threads_capacity;
859 if (TCR_PTR(__kmp_threads[0]) == NULL) {
865 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
866 capacity -= __kmp_hidden_helper_threads_num;
868 if (__kmp_nth + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
872 int slotsRequired = __kmp_nth + new_nthreads -
873 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
875 int slotsAdded = __kmp_expand_threads(slotsRequired);
876 if (slotsAdded < slotsRequired) {
878 new_nthreads -= (slotsRequired - slotsAdded);
879 KMP_ASSERT(new_nthreads >= 1);
882 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
883 __kmp_reserve_warn = 1;
884 if (__kmp_tp_cached) {
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
887 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
888 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
890 __kmp_msg(kmp_ms_warning,
891 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
892 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
899 if (new_nthreads == 1) {
901 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
902 "dead roots and rechecking; requested %d threads\n",
903 __kmp_get_gtid(), set_nthreads));
905 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
907 __kmp_get_gtid(), new_nthreads, set_nthreads));
916static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
917 kmp_info_t *master_th,
int master_gtid,
918 int fork_teams_workers) {
922 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
923 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
927 master_th->th.th_info.ds.ds_tid = 0;
928 master_th->th.th_team = team;
929 master_th->th.th_team_nproc = team->t.t_nproc;
930 master_th->th.th_team_master = master_th;
931 master_th->th.th_team_serialized = FALSE;
932 master_th->th.th_dispatch = &team->t.t_dispatch[0];
935#if KMP_NESTED_HOT_TEAMS
937 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
940 int level = team->t.t_active_level - 1;
941 if (master_th->th.th_teams_microtask) {
942 if (master_th->th.th_teams_size.nteams > 1) {
946 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
947 master_th->th.th_teams_level == team->t.t_level) {
952 if (level < __kmp_hot_teams_max_level) {
953 if (hot_teams[level].hot_team) {
955 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
959 hot_teams[level].hot_team = team;
960 hot_teams[level].hot_team_nth = team->t.t_nproc;
967 use_hot_team = team == root->r.r_hot_team;
972 team->t.t_threads[0] = master_th;
973 __kmp_initialize_info(master_th, team, 0, master_gtid);
976 for (i = 1; i < team->t.t_nproc; i++) {
979 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
980 team->t.t_threads[i] = thr;
981 KMP_DEBUG_ASSERT(thr);
982 KMP_DEBUG_ASSERT(thr->th.th_team == team);
984 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
985 "T#%d(%d:%d) join =%llu, plain=%llu\n",
986 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
987 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
988 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
989 team->t.t_bar[bs_plain_barrier].b_arrived));
990 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
991 thr->th.th_teams_level = master_th->th.th_teams_level;
992 thr->th.th_teams_size = master_th->th.th_teams_size;
995 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
996 for (b = 0; b < bs_last_barrier; ++b) {
997 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
998 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1000 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1006#if KMP_AFFINITY_SUPPORTED
1010 if (!fork_teams_workers) {
1011 __kmp_partition_places(team);
1015 if (team->t.t_nproc > 1 &&
1016 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1017 team->t.b->update_num_threads(team->t.t_nproc);
1018 __kmp_add_threads_to_team(team, team->t.t_nproc);
1022 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1023 for (i = 0; i < team->t.t_nproc; i++) {
1024 kmp_info_t *thr = team->t.t_threads[i];
1025 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1026 thr->th.th_prev_level != team->t.t_level) {
1027 team->t.t_display_affinity = 1;
1036#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1040inline static void propagateFPControl(kmp_team_t *team) {
1041 if (__kmp_inherit_fp_control) {
1042 kmp_int16 x87_fpu_control_word;
1046 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1047 __kmp_store_mxcsr(&mxcsr);
1048 mxcsr &= KMP_X86_MXCSR_MASK;
1059 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1060 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1063 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1067 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1073inline static void updateHWFPControl(kmp_team_t *team) {
1074 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1077 kmp_int16 x87_fpu_control_word;
1079 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1080 __kmp_store_mxcsr(&mxcsr);
1081 mxcsr &= KMP_X86_MXCSR_MASK;
1083 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1084 __kmp_clear_x87_fpu_status_word();
1085 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1088 if (team->t.t_mxcsr != mxcsr) {
1089 __kmp_load_mxcsr(&team->t.t_mxcsr);
1094#define propagateFPControl(x) ((void)0)
1095#define updateHWFPControl(x) ((void)0)
1098static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1103void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1104 kmp_info_t *this_thr;
1105 kmp_team_t *serial_team;
1107 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1114 if (!TCR_4(__kmp_init_parallel))
1115 __kmp_parallel_initialize();
1116 __kmp_resume_if_soft_paused();
1118 this_thr = __kmp_threads[global_tid];
1119 serial_team = this_thr->th.th_serial_team;
1122 KMP_DEBUG_ASSERT(serial_team);
1125 if (__kmp_tasking_mode != tskm_immediate_exec) {
1127 this_thr->th.th_task_team ==
1128 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1129 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1131 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1132 "team %p, new task_team = NULL\n",
1133 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1134 this_thr->th.th_task_team = NULL;
1137 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1138 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1139 proc_bind = proc_bind_false;
1140 }
else if (proc_bind == proc_bind_default) {
1143 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1146 this_thr->th.th_set_proc_bind = proc_bind_default;
1149 ompt_data_t ompt_parallel_data = ompt_data_none;
1150 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1151 if (ompt_enabled.enabled &&
1152 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1154 ompt_task_info_t *parent_task_info;
1155 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1157 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1158 if (ompt_enabled.ompt_callback_parallel_begin) {
1161 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1162 &(parent_task_info->task_data), &(parent_task_info->frame),
1163 &ompt_parallel_data, team_size,
1164 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1169 if (this_thr->th.th_team != serial_team) {
1171 int level = this_thr->th.th_team->t.t_level;
1173 if (serial_team->t.t_serialized) {
1176 kmp_team_t *new_team;
1178 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1181 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1185 proc_bind, &this_thr->th.th_current_task->td_icvs,
1186 0 USE_NESTED_HOT_ARG(NULL));
1187 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1188 KMP_ASSERT(new_team);
1191 new_team->t.t_threads[0] = this_thr;
1192 new_team->t.t_parent = this_thr->th.th_team;
1193 serial_team = new_team;
1194 this_thr->th.th_serial_team = serial_team;
1198 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1199 global_tid, serial_team));
1207 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1208 global_tid, serial_team));
1212 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1213 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1214 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1215 serial_team->t.t_ident = loc;
1216 serial_team->t.t_serialized = 1;
1217 serial_team->t.t_nproc = 1;
1218 serial_team->t.t_parent = this_thr->th.th_team;
1219 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1220 this_thr->th.th_team = serial_team;
1221 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1223 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1224 this_thr->th.th_current_task));
1225 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1226 this_thr->th.th_current_task->td_flags.executing = 0;
1228 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1233 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1234 &this_thr->th.th_current_task->td_parent->td_icvs);
1238 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1239 this_thr->th.th_current_task->td_icvs.nproc =
1240 __kmp_nested_nth.nth[level + 1];
1243 if (__kmp_nested_proc_bind.used &&
1244 (level + 1 < __kmp_nested_proc_bind.used)) {
1245 this_thr->th.th_current_task->td_icvs.proc_bind =
1246 __kmp_nested_proc_bind.bind_types[level + 1];
1250 serial_team->t.t_pkfn = (microtask_t)(~0);
1252 this_thr->th.th_info.ds.ds_tid = 0;
1255 this_thr->th.th_team_nproc = 1;
1256 this_thr->th.th_team_master = this_thr;
1257 this_thr->th.th_team_serialized = 1;
1259 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1260 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1261 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1263 propagateFPControl(serial_team);
1266 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1267 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1268 serial_team->t.t_dispatch->th_disp_buffer =
1269 (dispatch_private_info_t *)__kmp_allocate(
1270 sizeof(dispatch_private_info_t));
1272 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1279 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1280 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1281 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1282 ++serial_team->t.t_serialized;
1283 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1286 int level = this_thr->th.th_team->t.t_level;
1289 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1290 this_thr->th.th_current_task->td_icvs.nproc =
1291 __kmp_nested_nth.nth[level + 1];
1293 serial_team->t.t_level++;
1294 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1295 "of serial team %p to %d\n",
1296 global_tid, serial_team, serial_team->t.t_level));
1299 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1301 dispatch_private_info_t *disp_buffer =
1302 (dispatch_private_info_t *)__kmp_allocate(
1303 sizeof(dispatch_private_info_t));
1304 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1305 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1307 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1311 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1315 if (__kmp_display_affinity) {
1316 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1317 this_thr->th.th_prev_num_threads != 1) {
1319 __kmp_aux_display_affinity(global_tid, NULL);
1320 this_thr->th.th_prev_level = serial_team->t.t_level;
1321 this_thr->th.th_prev_num_threads = 1;
1325 if (__kmp_env_consistency_check)
1326 __kmp_push_parallel(global_tid, NULL);
1328 serial_team->t.ompt_team_info.master_return_address = codeptr;
1329 if (ompt_enabled.enabled &&
1330 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1331 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1332 OMPT_GET_FRAME_ADDRESS(0);
1334 ompt_lw_taskteam_t lw_taskteam;
1335 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1336 &ompt_parallel_data, codeptr);
1338 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1342 if (ompt_enabled.ompt_callback_implicit_task) {
1343 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1344 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1345 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1346 ompt_task_implicit);
1347 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1348 __kmp_tid_from_gtid(global_tid);
1352 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1353 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1354 OMPT_GET_FRAME_ADDRESS(0);
1360static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1361 microtask_t microtask,
int level,
1362 int teams_level, kmp_va_list ap) {
1363 return (master_th->th.th_teams_microtask && ap &&
1364 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1369static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1370 int teams_level, kmp_va_list ap) {
1371 return ((ap == NULL && active_level == 0) ||
1372 (ap && teams_level > 0 && teams_level == level));
1379__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1380 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1381 enum fork_context_e call_context, microtask_t microtask,
1382 launch_t invoker,
int master_set_numthreads,
int level,
1384 ompt_data_t ompt_parallel_data,
void *return_address,
1390 parent_team->t.t_ident = loc;
1391 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1392 parent_team->t.t_argc = argc;
1393 argv = (
void **)parent_team->t.t_argv;
1394 for (i = argc - 1; i >= 0; --i) {
1395 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1398 if (parent_team == master_th->th.th_serial_team) {
1401 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1403 if (call_context == fork_context_gnu) {
1406 parent_team->t.t_serialized--;
1411 parent_team->t.t_pkfn = microtask;
1416 void **exit_frame_p;
1417 ompt_data_t *implicit_task_data;
1418 ompt_lw_taskteam_t lw_taskteam;
1420 if (ompt_enabled.enabled) {
1421 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1422 &ompt_parallel_data, return_address);
1423 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1425 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1429 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1430 if (ompt_enabled.ompt_callback_implicit_task) {
1431 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1432 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1433 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1434 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1438 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1440 exit_frame_p = &dummy;
1446 parent_team->t.t_serialized--;
1449 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1450 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1451 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1460 if (ompt_enabled.enabled) {
1461 *exit_frame_p = NULL;
1462 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1463 if (ompt_enabled.ompt_callback_implicit_task) {
1464 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1465 ompt_scope_end, NULL, implicit_task_data, 1,
1466 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1468 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1469 __ompt_lw_taskteam_unlink(master_th);
1470 if (ompt_enabled.ompt_callback_parallel_end) {
1471 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1472 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1473 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1475 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1481 parent_team->t.t_pkfn = microtask;
1482 parent_team->t.t_invoke = invoker;
1483 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1484 parent_team->t.t_active_level++;
1485 parent_team->t.t_level++;
1486 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1493 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1496 if (ompt_enabled.enabled) {
1497 ompt_lw_taskteam_t lw_taskteam;
1498 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1500 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1505 if (master_set_numthreads) {
1506 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1508 kmp_info_t **other_threads = parent_team->t.t_threads;
1511 int old_proc = master_th->th.th_teams_size.nth;
1512 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1513 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1514 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1516 parent_team->t.t_nproc = master_set_numthreads;
1517 for (i = 0; i < master_set_numthreads; ++i) {
1518 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1522 master_th->th.th_set_nproc = 0;
1526 if (__kmp_debugging) {
1527 int nth = __kmp_omp_num_threads(loc);
1529 master_set_numthreads = nth;
1535 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1537 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1538 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1539 proc_bind = proc_bind_false;
1542 if (proc_bind == proc_bind_default) {
1543 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1549 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1550 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1551 master_th->th.th_current_task->td_icvs.proc_bind)) {
1552 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1555 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1557 if (proc_bind_icv != proc_bind_default &&
1558 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1559 kmp_info_t **other_threads = parent_team->t.t_threads;
1560 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1561 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1565 master_th->th.th_set_proc_bind = proc_bind_default;
1567#if USE_ITT_BUILD && USE_ITT_NOTIFY
1568 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1570 __kmp_forkjoin_frames_mode == 3 &&
1571 parent_team->t.t_active_level == 1
1572 && master_th->th.th_teams_size.nteams == 1) {
1573 kmp_uint64 tmp_time = __itt_get_timestamp();
1574 master_th->th.th_frame_time = tmp_time;
1575 parent_team->t.t_region_time = tmp_time;
1577 if (__itt_stack_caller_create_ptr) {
1578 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1580 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1583#if KMP_AFFINITY_SUPPORTED
1584 __kmp_partition_places(parent_team);
1587 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1588 "master_th=%p, gtid=%d\n",
1589 root, parent_team, master_th, gtid));
1590 __kmp_internal_fork(loc, gtid, parent_team);
1591 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1592 "master_th=%p, gtid=%d\n",
1593 root, parent_team, master_th, gtid));
1595 if (call_context == fork_context_gnu)
1599 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1600 parent_team->t.t_id, parent_team->t.t_pkfn));
1602 if (!parent_team->t.t_invoke(gtid)) {
1603 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1605 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1606 parent_team->t.t_id, parent_team->t.t_pkfn));
1609 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1616__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1617 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1618 kmp_info_t *master_th, kmp_team_t *parent_team,
1620 ompt_data_t *ompt_parallel_data,
void **return_address,
1621 ompt_data_t **parent_task_data,
1629#if KMP_OS_LINUX && \
1630 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1633 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1638 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1643 master_th->th.th_serial_team->t.t_pkfn = microtask;
1646 if (call_context == fork_context_intel) {
1648 master_th->th.th_serial_team->t.t_ident = loc;
1651 master_th->th.th_serial_team->t.t_level--;
1656 void **exit_frame_p;
1657 ompt_task_info_t *task_info;
1658 ompt_lw_taskteam_t lw_taskteam;
1660 if (ompt_enabled.enabled) {
1661 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1662 ompt_parallel_data, *return_address);
1664 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1666 task_info = OMPT_CUR_TASK_INFO(master_th);
1667 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1668 if (ompt_enabled.ompt_callback_implicit_task) {
1669 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1670 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1671 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1672 &(task_info->task_data), 1,
1673 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1677 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1679 exit_frame_p = &dummy;
1684 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1685 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1686 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1695 if (ompt_enabled.enabled) {
1696 *exit_frame_p = NULL;
1697 if (ompt_enabled.ompt_callback_implicit_task) {
1698 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1699 ompt_scope_end, NULL, &(task_info->task_data), 1,
1700 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1702 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1703 __ompt_lw_taskteam_unlink(master_th);
1704 if (ompt_enabled.ompt_callback_parallel_end) {
1705 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1706 ompt_parallel_data, *parent_task_data,
1707 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1709 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1712 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1713 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1714 team = master_th->th.th_team;
1716 team->t.t_invoke = invoker;
1717 __kmp_alloc_argv_entries(argc, team, TRUE);
1718 team->t.t_argc = argc;
1719 argv = (
void **)team->t.t_argv;
1721 for (i = argc - 1; i >= 0; --i)
1722 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1724 for (i = 0; i < argc; ++i)
1726 argv[i] = parent_team->t.t_argv[i];
1734 if (ompt_enabled.enabled) {
1735 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1736 if (ompt_enabled.ompt_callback_implicit_task) {
1737 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1738 ompt_scope_end, NULL, &(task_info->task_data), 0,
1739 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1741 if (ompt_enabled.ompt_callback_parallel_end) {
1742 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1743 ompt_parallel_data, *parent_task_data,
1744 OMPT_INVOKER(call_context) | ompt_parallel_league,
1747 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1752 for (i = argc - 1; i >= 0; --i)
1753 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1758 void **exit_frame_p;
1759 ompt_task_info_t *task_info;
1760 ompt_lw_taskteam_t lw_taskteam;
1761 ompt_data_t *implicit_task_data;
1763 if (ompt_enabled.enabled) {
1764 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1765 ompt_parallel_data, *return_address);
1766 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1768 task_info = OMPT_CUR_TASK_INFO(master_th);
1769 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1772 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1773 if (ompt_enabled.ompt_callback_implicit_task) {
1774 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1775 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1776 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1777 ompt_task_implicit);
1778 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1782 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1784 exit_frame_p = &dummy;
1789 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1790 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1791 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1800 if (ompt_enabled.enabled) {
1801 *exit_frame_p = NULL;
1802 if (ompt_enabled.ompt_callback_implicit_task) {
1803 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1804 ompt_scope_end, NULL, &(task_info->task_data), 1,
1805 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1808 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1809 __ompt_lw_taskteam_unlink(master_th);
1810 if (ompt_enabled.ompt_callback_parallel_end) {
1811 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1812 ompt_parallel_data, *parent_task_data,
1813 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1815 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1819 }
else if (call_context == fork_context_gnu) {
1821 if (ompt_enabled.enabled) {
1822 ompt_lw_taskteam_t lwt;
1823 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1826 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1827 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1833 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1836 KMP_ASSERT2(call_context < fork_context_last,
1837 "__kmp_serial_fork_call: unknown fork_context parameter");
1840 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1847int __kmp_fork_call(
ident_t *loc,
int gtid,
1848 enum fork_context_e call_context,
1849 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1854 int master_this_cons;
1856 kmp_team_t *parent_team;
1857 kmp_info_t *master_th;
1861 int master_set_numthreads;
1865#if KMP_NESTED_HOT_TEAMS
1866 kmp_hot_team_ptr_t **p_hot_teams;
1869 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1872 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1873 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1876 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1878 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1879 __kmp_stkpadding += (short)((kmp_int64)dummy);
1885 if (!TCR_4(__kmp_init_parallel))
1886 __kmp_parallel_initialize();
1887 __kmp_resume_if_soft_paused();
1892 master_th = __kmp_threads[gtid];
1894 parent_team = master_th->th.th_team;
1895 master_tid = master_th->th.th_info.ds.ds_tid;
1896 master_this_cons = master_th->th.th_local.this_construct;
1897 root = master_th->th.th_root;
1898 master_active = root->r.r_active;
1899 master_set_numthreads = master_th->th.th_set_nproc;
1902 ompt_data_t ompt_parallel_data = ompt_data_none;
1903 ompt_data_t *parent_task_data;
1904 ompt_frame_t *ompt_frame;
1905 void *return_address = NULL;
1907 if (ompt_enabled.enabled) {
1908 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1910 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1915 __kmp_assign_root_init_mask();
1918 level = parent_team->t.t_level;
1920 active_level = parent_team->t.t_active_level;
1922 teams_level = master_th->th.th_teams_level;
1923#if KMP_NESTED_HOT_TEAMS
1924 p_hot_teams = &master_th->th.th_hot_teams;
1925 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1926 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1927 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1928 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1930 (*p_hot_teams)[0].hot_team_nth = 1;
1935 if (ompt_enabled.enabled) {
1936 if (ompt_enabled.ompt_callback_parallel_begin) {
1937 int team_size = master_set_numthreads
1938 ? master_set_numthreads
1939 : get__nproc_2(parent_team, master_tid);
1940 int flags = OMPT_INVOKER(call_context) |
1941 ((microtask == (microtask_t)__kmp_teams_master)
1942 ? ompt_parallel_league
1943 : ompt_parallel_team);
1944 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1945 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1948 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1952 master_th->th.th_ident = loc;
1955 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1956 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1957 call_context, microtask, invoker,
1958 master_set_numthreads, level,
1960 ompt_parallel_data, return_address,
1966 if (__kmp_tasking_mode != tskm_immediate_exec) {
1967 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1968 parent_team->t.t_task_team[master_th->th.th_task_state]);
1978 __kmp_is_entering_teams(active_level, level, teams_level, ap);
1979 if ((!enter_teams &&
1980 (parent_team->t.t_active_level >=
1981 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
1982 (__kmp_library == library_serial)) {
1983 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
1986 nthreads = master_set_numthreads
1987 ? master_set_numthreads
1989 : get__nproc_2(parent_team, master_tid);
1994 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1999 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2000 nthreads, enter_teams);
2001 if (nthreads == 1) {
2005 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2009 KMP_DEBUG_ASSERT(nthreads > 0);
2012 master_th->th.th_set_nproc = 0;
2014 if (nthreads == 1) {
2015 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2016 invoker, master_th, parent_team,
2018 &ompt_parallel_data, &return_address,
2026 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2027 "curtask=%p, curtask_max_aclevel=%d\n",
2028 parent_team->t.t_active_level, master_th,
2029 master_th->th.th_current_task,
2030 master_th->th.th_current_task->td_icvs.max_active_levels));
2034 master_th->th.th_current_task->td_flags.executing = 0;
2036 if (!master_th->th.th_teams_microtask || level > teams_level) {
2038 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2042 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2043 if ((level + 1 < __kmp_nested_nth.used) &&
2044 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2045 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2051 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2053 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2054 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2055 proc_bind = proc_bind_false;
2059 if (proc_bind == proc_bind_default) {
2060 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2063 if (master_th->th.th_teams_microtask &&
2064 microtask == (microtask_t)__kmp_teams_master) {
2065 proc_bind = __kmp_teams_proc_bind;
2071 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2072 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2073 master_th->th.th_current_task->td_icvs.proc_bind)) {
2076 if (!master_th->th.th_teams_microtask ||
2077 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2078 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2083 master_th->th.th_set_proc_bind = proc_bind_default;
2085 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2086 kmp_internal_control_t new_icvs;
2087 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2088 new_icvs.next = NULL;
2089 if (nthreads_icv > 0) {
2090 new_icvs.nproc = nthreads_icv;
2092 if (proc_bind_icv != proc_bind_default) {
2093 new_icvs.proc_bind = proc_bind_icv;
2097 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2098 team = __kmp_allocate_team(root, nthreads, nthreads,
2102 proc_bind, &new_icvs,
2103 argc USE_NESTED_HOT_ARG(master_th));
2104 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2105 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2108 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2109 team = __kmp_allocate_team(root, nthreads, nthreads,
2114 &master_th->th.th_current_task->td_icvs,
2115 argc USE_NESTED_HOT_ARG(master_th));
2116 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2117 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2118 &master_th->th.th_current_task->td_icvs);
2121 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2124 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2125 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2126 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2127 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2128 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2130 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2133 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2135 if (!master_th->th.th_teams_microtask || level > teams_level) {
2136 int new_level = parent_team->t.t_level + 1;
2137 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2138 new_level = parent_team->t.t_active_level + 1;
2139 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2142 int new_level = parent_team->t.t_level;
2143 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2144 new_level = parent_team->t.t_active_level;
2145 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2147 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2149 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2151 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2152 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2155 propagateFPControl(team);
2157 if (ompd_state & OMPD_ENABLE_BP)
2158 ompd_bp_parallel_begin();
2161 if (__kmp_tasking_mode != tskm_immediate_exec) {
2164 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2165 parent_team->t.t_task_team[master_th->th.th_task_state]);
2166 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2167 "%p, new task_team %p / team %p\n",
2168 __kmp_gtid_from_thread(master_th),
2169 master_th->th.th_task_team, parent_team,
2170 team->t.t_task_team[master_th->th.th_task_state], team));
2172 if (active_level || master_th->th.th_task_team) {
2174 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2175 if (master_th->th.th_task_state_top >=
2176 master_th->th.th_task_state_stack_sz) {
2177 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2178 kmp_uint8 *old_stack, *new_stack;
2180 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2181 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2182 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2184 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2188 old_stack = master_th->th.th_task_state_memo_stack;
2189 master_th->th.th_task_state_memo_stack = new_stack;
2190 master_th->th.th_task_state_stack_sz = new_size;
2191 __kmp_free(old_stack);
2195 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2196 master_th->th.th_task_state;
2197 master_th->th.th_task_state_top++;
2198#if KMP_NESTED_HOT_TEAMS
2199 if (master_th->th.th_hot_teams &&
2200 active_level < __kmp_hot_teams_max_level &&
2201 team == master_th->th.th_hot_teams[active_level].hot_team) {
2203 master_th->th.th_task_state =
2205 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2208 master_th->th.th_task_state = 0;
2209#if KMP_NESTED_HOT_TEAMS
2213#if !KMP_NESTED_HOT_TEAMS
2214 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2215 (team == root->r.r_hot_team));
2221 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2222 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2224 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2225 (team->t.t_master_tid == 0 &&
2226 (team->t.t_parent == root->r.r_root_team ||
2227 team->t.t_parent->t.t_serialized)));
2231 argv = (
void **)team->t.t_argv;
2233 for (i = argc - 1; i >= 0; --i) {
2234 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2235 KMP_CHECK_UPDATE(*argv, new_argv);
2239 for (i = 0; i < argc; ++i) {
2241 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2246 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2247 if (!root->r.r_active)
2248 root->r.r_active = TRUE;
2250 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2251 __kmp_setup_icv_copy(team, nthreads,
2252 &master_th->th.th_current_task->td_icvs, loc);
2255 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2258 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2261 if (team->t.t_active_level == 1
2262 && !master_th->th.th_teams_microtask) {
2264 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2265 (__kmp_forkjoin_frames_mode == 3 ||
2266 __kmp_forkjoin_frames_mode == 1)) {
2267 kmp_uint64 tmp_time = 0;
2268 if (__itt_get_timestamp_ptr)
2269 tmp_time = __itt_get_timestamp();
2271 master_th->th.th_frame_time = tmp_time;
2272 if (__kmp_forkjoin_frames_mode == 3)
2273 team->t.t_region_time = tmp_time;
2277 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2278 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2280 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2286 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2289 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2290 root, team, master_th, gtid));
2293 if (__itt_stack_caller_create_ptr) {
2296 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2297 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2298 }
else if (parent_team->t.t_serialized) {
2303 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2304 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2312 __kmp_internal_fork(loc, gtid, team);
2313 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2314 "master_th=%p, gtid=%d\n",
2315 root, team, master_th, gtid));
2318 if (call_context == fork_context_gnu) {
2319 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2324 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2325 team->t.t_id, team->t.t_pkfn));
2328#if KMP_STATS_ENABLED
2332 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2336 if (!team->t.t_invoke(gtid)) {
2337 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2340#if KMP_STATS_ENABLED
2343 KMP_SET_THREAD_STATE(previous_state);
2347 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2348 team->t.t_id, team->t.t_pkfn));
2351 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2353 if (ompt_enabled.enabled) {
2354 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2362static inline void __kmp_join_restore_state(kmp_info_t *thread,
2365 thread->th.ompt_thread_info.state =
2366 ((team->t.t_serialized) ? ompt_state_work_serial
2367 : ompt_state_work_parallel);
2370static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2371 kmp_team_t *team, ompt_data_t *parallel_data,
2372 int flags,
void *codeptr) {
2373 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2374 if (ompt_enabled.ompt_callback_parallel_end) {
2375 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2376 parallel_data, &(task_info->task_data), flags, codeptr);
2379 task_info->frame.enter_frame = ompt_data_none;
2380 __kmp_join_restore_state(thread, team);
2384void __kmp_join_call(
ident_t *loc,
int gtid
2387 enum fork_context_e fork_context
2391 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2393 kmp_team_t *parent_team;
2394 kmp_info_t *master_th;
2398 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2401 master_th = __kmp_threads[gtid];
2402 root = master_th->th.th_root;
2403 team = master_th->th.th_team;
2404 parent_team = team->t.t_parent;
2406 master_th->th.th_ident = loc;
2409 void *team_microtask = (
void *)team->t.t_pkfn;
2413 if (ompt_enabled.enabled &&
2414 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2415 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2420 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2421 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2422 "th_task_team = %p\n",
2423 __kmp_gtid_from_thread(master_th), team,
2424 team->t.t_task_team[master_th->th.th_task_state],
2425 master_th->th.th_task_team));
2426 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2427 team->t.t_task_team[master_th->th.th_task_state]);
2431 if (team->t.t_serialized) {
2432 if (master_th->th.th_teams_microtask) {
2434 int level = team->t.t_level;
2435 int tlevel = master_th->th.th_teams_level;
2436 if (level == tlevel) {
2440 }
else if (level == tlevel + 1) {
2444 team->t.t_serialized++;
2450 if (ompt_enabled.enabled) {
2451 if (fork_context == fork_context_gnu) {
2452 __ompt_lw_taskteam_unlink(master_th);
2454 __kmp_join_restore_state(master_th, parent_team);
2461 master_active = team->t.t_master_active;
2466 __kmp_internal_join(loc, gtid, team);
2468 if (__itt_stack_caller_create_ptr) {
2469 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2471 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2472 team->t.t_stack_id = NULL;
2476 master_th->th.th_task_state =
2479 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2480 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2484 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2485 parent_team->t.t_stack_id = NULL;
2493 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2494 void *codeptr = team->t.ompt_team_info.master_return_address;
2499 if (team->t.t_active_level == 1 &&
2500 (!master_th->th.th_teams_microtask ||
2501 master_th->th.th_teams_size.nteams == 1)) {
2502 master_th->th.th_ident = loc;
2505 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2506 __kmp_forkjoin_frames_mode == 3)
2507 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2508 master_th->th.th_frame_time, 0, loc,
2509 master_th->th.th_team_nproc, 1);
2510 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2511 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2512 __kmp_itt_region_joined(gtid);
2516#if KMP_AFFINITY_SUPPORTED
2519 master_th->th.th_first_place = team->t.t_first_place;
2520 master_th->th.th_last_place = team->t.t_last_place;
2524 if (master_th->th.th_teams_microtask && !exit_teams &&
2525 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2526 team->t.t_level == master_th->th.th_teams_level + 1) {
2531 ompt_data_t ompt_parallel_data = ompt_data_none;
2532 if (ompt_enabled.enabled) {
2533 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2534 if (ompt_enabled.ompt_callback_implicit_task) {
2535 int ompt_team_size = team->t.t_nproc;
2536 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2537 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2538 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2540 task_info->frame.exit_frame = ompt_data_none;
2541 task_info->task_data = ompt_data_none;
2542 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2543 __ompt_lw_taskteam_unlink(master_th);
2548 team->t.t_active_level--;
2549 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2555 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2556 int old_num = master_th->th.th_team_nproc;
2557 int new_num = master_th->th.th_teams_size.nth;
2558 kmp_info_t **other_threads = team->t.t_threads;
2559 team->t.t_nproc = new_num;
2560 for (
int i = 0; i < old_num; ++i) {
2561 other_threads[i]->th.th_team_nproc = new_num;
2564 for (
int i = old_num; i < new_num; ++i) {
2566 KMP_DEBUG_ASSERT(other_threads[i]);
2567 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2568 for (
int b = 0; b < bs_last_barrier; ++b) {
2569 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2570 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2572 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2575 if (__kmp_tasking_mode != tskm_immediate_exec) {
2577 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2583 if (ompt_enabled.enabled) {
2584 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2585 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2593 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2594 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2596 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2601 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2603 if (!master_th->th.th_teams_microtask ||
2604 team->t.t_level > master_th->th.th_teams_level) {
2606 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2608 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2611 if (ompt_enabled.enabled) {
2612 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2613 if (ompt_enabled.ompt_callback_implicit_task) {
2614 int flags = (team_microtask == (
void *)__kmp_teams_master)
2616 : ompt_task_implicit;
2617 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2618 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2619 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2620 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2622 task_info->frame.exit_frame = ompt_data_none;
2623 task_info->task_data = ompt_data_none;
2627 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2629 __kmp_pop_current_task_from_thread(master_th);
2631 master_th->th.th_def_allocator = team->t.t_def_allocator;
2634 if (ompd_state & OMPD_ENABLE_BP)
2635 ompd_bp_parallel_end();
2637 updateHWFPControl(team);
2639 if (root->r.r_active != master_active)
2640 root->r.r_active = master_active;
2642 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2650 master_th->th.th_team = parent_team;
2651 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2652 master_th->th.th_team_master = parent_team->t.t_threads[0];
2653 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2656 if (parent_team->t.t_serialized &&
2657 parent_team != master_th->th.th_serial_team &&
2658 parent_team != root->r.r_root_team) {
2659 __kmp_free_team(root,
2660 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2661 master_th->th.th_serial_team = parent_team;
2664 if (__kmp_tasking_mode != tskm_immediate_exec) {
2665 if (master_th->th.th_task_state_top >
2667 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2669 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2670 master_th->th.th_task_state;
2671 --master_th->th.th_task_state_top;
2673 master_th->th.th_task_state =
2675 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2676 }
else if (team != root->r.r_hot_team) {
2681 master_th->th.th_task_state = 0;
2684 master_th->th.th_task_team =
2685 parent_team->t.t_task_team[master_th->th.th_task_state];
2687 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2688 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2695 master_th->th.th_current_task->td_flags.executing = 1;
2697 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2699#if KMP_AFFINITY_SUPPORTED
2700 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2701 __kmp_reset_root_init_mask(gtid);
2706 OMPT_INVOKER(fork_context) |
2707 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2708 : ompt_parallel_team);
2709 if (ompt_enabled.enabled) {
2710 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2716 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2721void __kmp_save_internal_controls(kmp_info_t *thread) {
2723 if (thread->th.th_team != thread->th.th_serial_team) {
2726 if (thread->th.th_team->t.t_serialized > 1) {
2729 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2732 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2733 thread->th.th_team->t.t_serialized) {
2738 kmp_internal_control_t *control =
2739 (kmp_internal_control_t *)__kmp_allocate(
2740 sizeof(kmp_internal_control_t));
2742 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2744 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2746 control->next = thread->th.th_team->t.t_control_stack_top;
2747 thread->th.th_team->t.t_control_stack_top = control;
2753void __kmp_set_num_threads(
int new_nth,
int gtid) {
2757 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2758 KMP_DEBUG_ASSERT(__kmp_init_serial);
2762 else if (new_nth > __kmp_max_nth)
2763 new_nth = __kmp_max_nth;
2766 thread = __kmp_threads[gtid];
2767 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2770 __kmp_save_internal_controls(thread);
2772 set__nproc(thread, new_nth);
2777 root = thread->th.th_root;
2778 if (__kmp_init_parallel && (!root->r.r_active) &&
2779 (root->r.r_hot_team->t.t_nproc > new_nth)
2780#
if KMP_NESTED_HOT_TEAMS
2781 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2784 kmp_team_t *hot_team = root->r.r_hot_team;
2787 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2789 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2790 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2793 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2794 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2795 if (__kmp_tasking_mode != tskm_immediate_exec) {
2798 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2800 __kmp_free_thread(hot_team->t.t_threads[f]);
2801 hot_team->t.t_threads[f] = NULL;
2803 hot_team->t.t_nproc = new_nth;
2804#if KMP_NESTED_HOT_TEAMS
2805 if (thread->th.th_hot_teams) {
2806 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2807 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2811 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2812 hot_team->t.b->update_num_threads(new_nth);
2813 __kmp_add_threads_to_team(hot_team, new_nth);
2816 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2819 for (f = 0; f < new_nth; f++) {
2820 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2821 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2824 hot_team->t.t_size_changed = -1;
2829void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2832 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2834 gtid, max_active_levels));
2835 KMP_DEBUG_ASSERT(__kmp_init_serial);
2838 if (max_active_levels < 0) {
2839 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2844 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2845 "max_active_levels for thread %d = (%d)\n",
2846 gtid, max_active_levels));
2849 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2854 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2855 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2856 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2862 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2863 "max_active_levels for thread %d = (%d)\n",
2864 gtid, max_active_levels));
2866 thread = __kmp_threads[gtid];
2868 __kmp_save_internal_controls(thread);
2870 set__max_active_levels(thread, max_active_levels);
2874int __kmp_get_max_active_levels(
int gtid) {
2877 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2878 KMP_DEBUG_ASSERT(__kmp_init_serial);
2880 thread = __kmp_threads[gtid];
2881 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2882 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2883 "curtask_maxaclevel=%d\n",
2884 gtid, thread->th.th_current_task,
2885 thread->th.th_current_task->td_icvs.max_active_levels));
2886 return thread->th.th_current_task->td_icvs.max_active_levels;
2890void __kmp_set_num_teams(
int num_teams) {
2892 __kmp_nteams = num_teams;
2894int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2896void __kmp_set_teams_thread_limit(
int limit) {
2898 __kmp_teams_thread_limit = limit;
2900int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2902KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2903KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2906void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2908 kmp_sched_t orig_kind;
2911 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2912 gtid, (
int)kind, chunk));
2913 KMP_DEBUG_ASSERT(__kmp_init_serial);
2920 kind = __kmp_sched_without_mods(kind);
2922 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2923 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2925 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2926 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2928 kind = kmp_sched_default;
2932 thread = __kmp_threads[gtid];
2934 __kmp_save_internal_controls(thread);
2936 if (kind < kmp_sched_upper_std) {
2937 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2940 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2942 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2943 __kmp_sch_map[kind - kmp_sched_lower - 1];
2948 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2949 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2950 kmp_sched_lower - 2];
2952 __kmp_sched_apply_mods_intkind(
2953 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2954 if (kind == kmp_sched_auto || chunk < 1) {
2956 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2958 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2963void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2967 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2968 KMP_DEBUG_ASSERT(__kmp_init_serial);
2970 thread = __kmp_threads[gtid];
2972 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2973 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2975 case kmp_sch_static_greedy:
2976 case kmp_sch_static_balanced:
2977 *kind = kmp_sched_static;
2978 __kmp_sched_apply_mods_stdkind(kind, th_type);
2981 case kmp_sch_static_chunked:
2982 *kind = kmp_sched_static;
2984 case kmp_sch_dynamic_chunked:
2985 *kind = kmp_sched_dynamic;
2988 case kmp_sch_guided_iterative_chunked:
2989 case kmp_sch_guided_analytical_chunked:
2990 *kind = kmp_sched_guided;
2993 *kind = kmp_sched_auto;
2995 case kmp_sch_trapezoidal:
2996 *kind = kmp_sched_trapezoidal;
2998#if KMP_STATIC_STEAL_ENABLED
2999 case kmp_sch_static_steal:
3000 *kind = kmp_sched_static_steal;
3004 KMP_FATAL(UnknownSchedulingType, th_type);
3007 __kmp_sched_apply_mods_stdkind(kind, th_type);
3008 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3011int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3017 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3018 KMP_DEBUG_ASSERT(__kmp_init_serial);
3025 thr = __kmp_threads[gtid];
3026 team = thr->th.th_team;
3027 ii = team->t.t_level;
3031 if (thr->th.th_teams_microtask) {
3033 int tlevel = thr->th.th_teams_level;
3036 KMP_DEBUG_ASSERT(ii >= tlevel);
3048 return __kmp_tid_from_gtid(gtid);
3050 dd = team->t.t_serialized;
3052 while (ii > level) {
3053 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3055 if ((team->t.t_serialized) && (!dd)) {
3056 team = team->t.t_parent;
3060 team = team->t.t_parent;
3061 dd = team->t.t_serialized;
3066 return (dd > 1) ? (0) : (team->t.t_master_tid);
3069int __kmp_get_team_size(
int gtid,
int level) {
3075 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3076 KMP_DEBUG_ASSERT(__kmp_init_serial);
3083 thr = __kmp_threads[gtid];
3084 team = thr->th.th_team;
3085 ii = team->t.t_level;
3089 if (thr->th.th_teams_microtask) {
3091 int tlevel = thr->th.th_teams_level;
3094 KMP_DEBUG_ASSERT(ii >= tlevel);
3105 while (ii > level) {
3106 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3108 if (team->t.t_serialized && (!dd)) {
3109 team = team->t.t_parent;
3113 team = team->t.t_parent;
3118 return team->t.t_nproc;
3121kmp_r_sched_t __kmp_get_schedule_global() {
3126 kmp_r_sched_t r_sched;
3132 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3133 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3136 r_sched.r_sched_type = __kmp_static;
3139 r_sched.r_sched_type = __kmp_guided;
3141 r_sched.r_sched_type = __kmp_sched;
3143 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3145 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3147 r_sched.chunk = KMP_DEFAULT_CHUNK;
3149 r_sched.chunk = __kmp_chunk;
3157static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3159 KMP_DEBUG_ASSERT(team);
3160 if (!realloc || argc > team->t.t_max_argc) {
3162 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3163 "current entries=%d\n",
3164 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3166 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3167 __kmp_free((
void *)team->t.t_argv);
3169 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3171 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3172 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3174 team->t.t_id, team->t.t_max_argc));
3175 team->t.t_argv = &team->t.t_inline_argv[0];
3176 if (__kmp_storage_map) {
3177 __kmp_print_storage_map_gtid(
3178 -1, &team->t.t_inline_argv[0],
3179 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3180 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3185 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3186 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3188 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3190 team->t.t_id, team->t.t_max_argc));
3192 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3193 if (__kmp_storage_map) {
3194 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3195 &team->t.t_argv[team->t.t_max_argc],
3196 sizeof(
void *) * team->t.t_max_argc,
3197 "team_%d.t_argv", team->t.t_id);
3203static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3205 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3207 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3208 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3209 sizeof(dispatch_shared_info_t) * num_disp_buff);
3210 team->t.t_dispatch =
3211 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3212 team->t.t_implicit_task_taskdata =
3213 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3214 team->t.t_max_nproc = max_nth;
3217 for (i = 0; i < num_disp_buff; ++i) {
3218 team->t.t_disp_buffer[i].buffer_index = i;
3219 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3223static void __kmp_free_team_arrays(kmp_team_t *team) {
3226 for (i = 0; i < team->t.t_max_nproc; ++i) {
3227 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3228 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3229 team->t.t_dispatch[i].th_disp_buffer = NULL;
3232#if KMP_USE_HIER_SCHED
3233 __kmp_dispatch_free_hierarchies(team);
3235 __kmp_free(team->t.t_threads);
3236 __kmp_free(team->t.t_disp_buffer);
3237 __kmp_free(team->t.t_dispatch);
3238 __kmp_free(team->t.t_implicit_task_taskdata);
3239 team->t.t_threads = NULL;
3240 team->t.t_disp_buffer = NULL;
3241 team->t.t_dispatch = NULL;
3242 team->t.t_implicit_task_taskdata = 0;
3245static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3246 kmp_info_t **oldThreads = team->t.t_threads;
3248 __kmp_free(team->t.t_disp_buffer);
3249 __kmp_free(team->t.t_dispatch);
3250 __kmp_free(team->t.t_implicit_task_taskdata);
3251 __kmp_allocate_team_arrays(team, max_nth);
3253 KMP_MEMCPY(team->t.t_threads, oldThreads,
3254 team->t.t_nproc *
sizeof(kmp_info_t *));
3256 __kmp_free(oldThreads);
3259static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3261 kmp_r_sched_t r_sched =
3262 __kmp_get_schedule_global();
3264 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3266 kmp_internal_control_t g_icvs = {
3268 (kmp_int8)__kmp_global.g.g_dynamic,
3270 (kmp_int8)__kmp_env_blocktime,
3272 __kmp_dflt_blocktime,
3277 __kmp_dflt_team_nth,
3281 __kmp_dflt_max_active_levels,
3285 __kmp_nested_proc_bind.bind_types[0],
3286 __kmp_default_device,
3293static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3295 kmp_internal_control_t gx_icvs;
3296 gx_icvs.serial_nesting_level =
3298 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3299 gx_icvs.next = NULL;
3304static void __kmp_initialize_root(kmp_root_t *root) {
3306 kmp_team_t *root_team;
3307 kmp_team_t *hot_team;
3308 int hot_team_max_nth;
3309 kmp_r_sched_t r_sched =
3310 __kmp_get_schedule_global();
3311 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3312 KMP_DEBUG_ASSERT(root);
3313 KMP_ASSERT(!root->r.r_begin);
3316 __kmp_init_lock(&root->r.r_begin_lock);
3317 root->r.r_begin = FALSE;
3318 root->r.r_active = FALSE;
3319 root->r.r_in_parallel = 0;
3320 root->r.r_blocktime = __kmp_dflt_blocktime;
3321#if KMP_AFFINITY_SUPPORTED
3322 root->r.r_affinity_assigned = FALSE;
3327 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3330 __kmp_allocate_team(root,
3336 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3338 USE_NESTED_HOT_ARG(NULL)
3343 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3346 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3348 root->r.r_root_team = root_team;
3349 root_team->t.t_control_stack_top = NULL;
3352 root_team->t.t_threads[0] = NULL;
3353 root_team->t.t_nproc = 1;
3354 root_team->t.t_serialized = 1;
3356 root_team->t.t_sched.sched = r_sched.sched;
3359 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3360 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3364 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3367 __kmp_allocate_team(root,
3369 __kmp_dflt_team_nth_ub * 2,
3373 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3375 USE_NESTED_HOT_ARG(NULL)
3377 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3379 root->r.r_hot_team = hot_team;
3380 root_team->t.t_control_stack_top = NULL;
3383 hot_team->t.t_parent = root_team;
3386 hot_team_max_nth = hot_team->t.t_max_nproc;
3387 for (f = 0; f < hot_team_max_nth; ++f) {
3388 hot_team->t.t_threads[f] = NULL;
3390 hot_team->t.t_nproc = 1;
3392 hot_team->t.t_sched.sched = r_sched.sched;
3393 hot_team->t.t_size_changed = 0;
3398typedef struct kmp_team_list_item {
3399 kmp_team_p
const *entry;
3400 struct kmp_team_list_item *next;
3401} kmp_team_list_item_t;
3402typedef kmp_team_list_item_t *kmp_team_list_t;
3404static void __kmp_print_structure_team_accum(
3405 kmp_team_list_t list,
3406 kmp_team_p
const *team
3416 KMP_DEBUG_ASSERT(list != NULL);
3421 __kmp_print_structure_team_accum(list, team->t.t_parent);
3422 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3426 while (l->next != NULL && l->entry != team) {
3429 if (l->next != NULL) {
3435 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3441 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3442 sizeof(kmp_team_list_item_t));
3449static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3452 __kmp_printf(
"%s", title);
3454 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3456 __kmp_printf(
" - (nil)\n");
3460static void __kmp_print_structure_thread(
char const *title,
3461 kmp_info_p
const *thread) {
3462 __kmp_printf(
"%s", title);
3463 if (thread != NULL) {
3464 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3466 __kmp_printf(
" - (nil)\n");
3470void __kmp_print_structure(
void) {
3472 kmp_team_list_t list;
3476 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3480 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3481 "Table\n------------------------------\n");
3484 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3485 __kmp_printf(
"%2d", gtid);
3486 if (__kmp_threads != NULL) {
3487 __kmp_printf(
" %p", __kmp_threads[gtid]);
3489 if (__kmp_root != NULL) {
3490 __kmp_printf(
" %p", __kmp_root[gtid]);
3497 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3499 if (__kmp_threads != NULL) {
3501 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3502 kmp_info_t
const *thread = __kmp_threads[gtid];
3503 if (thread != NULL) {
3504 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3505 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3506 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3507 __kmp_print_structure_team(
" Serial Team: ",
3508 thread->th.th_serial_team);
3509 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3510 __kmp_print_structure_thread(
" Primary: ",
3511 thread->th.th_team_master);
3512 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3513 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3514 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3515 __kmp_print_structure_thread(
" Next in pool: ",
3516 thread->th.th_next_pool);
3518 __kmp_print_structure_team_accum(list, thread->th.th_team);
3519 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3523 __kmp_printf(
"Threads array is not allocated.\n");
3527 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3529 if (__kmp_root != NULL) {
3531 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3532 kmp_root_t
const *root = __kmp_root[gtid];
3534 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3535 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3536 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3537 __kmp_print_structure_thread(
" Uber Thread: ",
3538 root->r.r_uber_thread);
3539 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3540 __kmp_printf(
" In Parallel: %2d\n",
3541 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3543 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3544 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3548 __kmp_printf(
"Ubers array is not allocated.\n");
3551 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3553 while (list->next != NULL) {
3554 kmp_team_p
const *team = list->entry;
3556 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3557 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3558 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3559 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3560 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3561 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3562 for (i = 0; i < team->t.t_nproc; ++i) {
3563 __kmp_printf(
" Thread %2d: ", i);
3564 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3566 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3572 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3574 __kmp_print_structure_thread(
"Thread pool: ",
3575 CCAST(kmp_info_t *, __kmp_thread_pool));
3576 __kmp_print_structure_team(
"Team pool: ",
3577 CCAST(kmp_team_t *, __kmp_team_pool));
3581 while (list != NULL) {
3582 kmp_team_list_item_t *item = list;
3584 KMP_INTERNAL_FREE(item);
3593static const unsigned __kmp_primes[] = {
3594 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3595 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3596 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3597 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3598 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3599 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3600 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3601 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3602 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3603 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3604 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3608unsigned short __kmp_get_random(kmp_info_t *thread) {
3609 unsigned x = thread->th.th_x;
3610 unsigned short r = (
unsigned short)(x >> 16);
3612 thread->th.th_x = x * thread->th.th_a + 1;
3614 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3615 thread->th.th_info.ds.ds_tid, r));
3621void __kmp_init_random(kmp_info_t *thread) {
3622 unsigned seed = thread->th.th_info.ds.ds_tid;
3625 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3626 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3628 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3634static int __kmp_reclaim_dead_roots(
void) {
3637 for (i = 0; i < __kmp_threads_capacity; ++i) {
3638 if (KMP_UBER_GTID(i) &&
3639 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3642 r += __kmp_unregister_root_other_thread(i);
3667static int __kmp_expand_threads(
int nNeed) {
3669 int minimumRequiredCapacity;
3671 kmp_info_t **newThreads;
3672 kmp_root_t **newRoot;
3678#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3681 added = __kmp_reclaim_dead_roots();
3710 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3713 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3717 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3719 newCapacity = __kmp_threads_capacity;
3721 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3722 : __kmp_sys_max_nth;
3723 }
while (newCapacity < minimumRequiredCapacity);
3724 newThreads = (kmp_info_t **)__kmp_allocate(
3725 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3727 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3728 KMP_MEMCPY(newThreads, __kmp_threads,
3729 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3730 KMP_MEMCPY(newRoot, __kmp_root,
3731 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3734 kmp_old_threads_list_t *node =
3735 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3736 node->threads = __kmp_threads;
3737 node->next = __kmp_old_threads_list;
3738 __kmp_old_threads_list = node;
3740 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3741 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3742 added += newCapacity - __kmp_threads_capacity;
3743 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3745 if (newCapacity > __kmp_tp_capacity) {
3746 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3747 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3748 __kmp_threadprivate_resize_cache(newCapacity);
3750 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3752 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3761int __kmp_register_root(
int initial_thread) {
3762 kmp_info_t *root_thread;
3766 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3767 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3784 capacity = __kmp_threads_capacity;
3785 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3792 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3793 capacity -= __kmp_hidden_helper_threads_num;
3797 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3798 if (__kmp_tp_cached) {
3799 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3800 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3801 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3803 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3813 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3816 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3817 gtid <= __kmp_hidden_helper_threads_num;
3820 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3821 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3822 "hidden helper thread: T#%d\n",
3828 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3831 for (gtid = __kmp_hidden_helper_threads_num + 1;
3832 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3836 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3837 KMP_ASSERT(gtid < __kmp_threads_capacity);
3842 TCW_4(__kmp_nth, __kmp_nth + 1);
3846 if (__kmp_adjust_gtid_mode) {
3847 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3848 if (TCR_4(__kmp_gtid_mode) != 2) {
3849 TCW_4(__kmp_gtid_mode, 2);
3852 if (TCR_4(__kmp_gtid_mode) != 1) {
3853 TCW_4(__kmp_gtid_mode, 1);
3858#ifdef KMP_ADJUST_BLOCKTIME
3861 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3862 if (__kmp_nth > __kmp_avail_proc) {
3863 __kmp_zero_bt = TRUE;
3869 if (!(root = __kmp_root[gtid])) {
3870 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3871 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3874#if KMP_STATS_ENABLED
3876 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3877 __kmp_stats_thread_ptr->startLife();
3878 KMP_SET_THREAD_STATE(SERIAL_REGION);
3881 __kmp_initialize_root(root);
3884 if (root->r.r_uber_thread) {
3885 root_thread = root->r.r_uber_thread;
3887 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3888 if (__kmp_storage_map) {
3889 __kmp_print_thread_storage_map(root_thread, gtid);
3891 root_thread->th.th_info.ds.ds_gtid = gtid;
3893 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3895 root_thread->th.th_root = root;
3896 if (__kmp_env_consistency_check) {
3897 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3900 __kmp_initialize_fast_memory(root_thread);
3904 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3905 __kmp_initialize_bget(root_thread);
3907 __kmp_init_random(root_thread);
3911 if (!root_thread->th.th_serial_team) {
3912 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3913 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3914 root_thread->th.th_serial_team = __kmp_allocate_team(
3919 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3921 KMP_ASSERT(root_thread->th.th_serial_team);
3922 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3923 root_thread->th.th_serial_team));
3926 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3928 root->r.r_root_team->t.t_threads[0] = root_thread;
3929 root->r.r_hot_team->t.t_threads[0] = root_thread;
3930 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3932 root_thread->th.th_serial_team->t.t_serialized = 0;
3933 root->r.r_uber_thread = root_thread;
3936 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3937 TCW_4(__kmp_init_gtid, TRUE);
3940 __kmp_gtid_set_specific(gtid);
3943 __kmp_itt_thread_name(gtid);
3946#ifdef KMP_TDATA_GTID
3949 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3950 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3952 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3954 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3955 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3956 KMP_INIT_BARRIER_STATE));
3959 for (b = 0; b < bs_last_barrier; ++b) {
3960 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3962 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3966 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3967 KMP_INIT_BARRIER_STATE);
3969#if KMP_AFFINITY_SUPPORTED
3970 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3971 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3972 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3973 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3975 root_thread->th.th_def_allocator = __kmp_def_allocator;
3976 root_thread->th.th_prev_level = 0;
3977 root_thread->th.th_prev_num_threads = 1;
3979 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3980 tmp->cg_root = root_thread;
3981 tmp->cg_thread_limit = __kmp_cg_max_nth;
3982 tmp->cg_nthreads = 1;
3983 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3984 " cg_nthreads init to 1\n",
3987 root_thread->th.th_cg_roots = tmp;
3989 __kmp_root_counter++;
3992 if (!initial_thread && ompt_enabled.enabled) {
3994 kmp_info_t *root_thread = ompt_get_thread();
3996 ompt_set_thread_state(root_thread, ompt_state_overhead);
3998 if (ompt_enabled.ompt_callback_thread_begin) {
3999 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4000 ompt_thread_initial, __ompt_get_thread_data_internal());
4002 ompt_data_t *task_data;
4003 ompt_data_t *parallel_data;
4004 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4006 if (ompt_enabled.ompt_callback_implicit_task) {
4007 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4008 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4011 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4015 if (ompd_state & OMPD_ENABLE_BP)
4016 ompd_bp_thread_begin();
4020 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4025#if KMP_NESTED_HOT_TEAMS
4026static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4027 const int max_level) {
4029 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4030 if (!hot_teams || !hot_teams[level].hot_team) {
4033 KMP_DEBUG_ASSERT(level < max_level);
4034 kmp_team_t *team = hot_teams[level].hot_team;
4035 nth = hot_teams[level].hot_team_nth;
4037 if (level < max_level - 1) {
4038 for (i = 0; i < nth; ++i) {
4039 kmp_info_t *th = team->t.t_threads[i];
4040 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4041 if (i > 0 && th->th.th_hot_teams) {
4042 __kmp_free(th->th.th_hot_teams);
4043 th->th.th_hot_teams = NULL;
4047 __kmp_free_team(root, team, NULL);
4054static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4055 kmp_team_t *root_team = root->r.r_root_team;
4056 kmp_team_t *hot_team = root->r.r_hot_team;
4057 int n = hot_team->t.t_nproc;
4060 KMP_DEBUG_ASSERT(!root->r.r_active);
4062 root->r.r_root_team = NULL;
4063 root->r.r_hot_team = NULL;
4066 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4067#if KMP_NESTED_HOT_TEAMS
4068 if (__kmp_hot_teams_max_level >
4070 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4071 kmp_info_t *th = hot_team->t.t_threads[i];
4072 if (__kmp_hot_teams_max_level > 1) {
4073 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4075 if (th->th.th_hot_teams) {
4076 __kmp_free(th->th.th_hot_teams);
4077 th->th.th_hot_teams = NULL;
4082 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4087 if (__kmp_tasking_mode != tskm_immediate_exec) {
4088 __kmp_wait_to_unref_task_teams();
4094 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4096 (LPVOID) & (root->r.r_uber_thread->th),
4097 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4098 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4102 if (ompd_state & OMPD_ENABLE_BP)
4103 ompd_bp_thread_end();
4107 ompt_data_t *task_data;
4108 ompt_data_t *parallel_data;
4109 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4111 if (ompt_enabled.ompt_callback_implicit_task) {
4112 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4113 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4115 if (ompt_enabled.ompt_callback_thread_end) {
4116 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4117 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4123 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4124 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4126 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4127 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4130 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4131 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4132 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4133 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4134 root->r.r_uber_thread->th.th_cg_roots = NULL;
4136 __kmp_reap_thread(root->r.r_uber_thread, 1);
4140 root->r.r_uber_thread = NULL;
4142 root->r.r_begin = FALSE;
4147void __kmp_unregister_root_current_thread(
int gtid) {
4148 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4152 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4153 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4154 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4157 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4160 kmp_root_t *root = __kmp_root[gtid];
4162 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4163 KMP_ASSERT(KMP_UBER_GTID(gtid));
4164 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4165 KMP_ASSERT(root->r.r_active == FALSE);
4169 kmp_info_t *thread = __kmp_threads[gtid];
4170 kmp_team_t *team = thread->th.th_team;
4171 kmp_task_team_t *task_team = thread->th.th_task_team;
4174 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4175 task_team->tt.tt_hidden_helper_task_encountered)) {
4178 thread->th.ompt_thread_info.state = ompt_state_undefined;
4180 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4183 __kmp_reset_root(gtid, root);
4187 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4189 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4196static int __kmp_unregister_root_other_thread(
int gtid) {
4197 kmp_root_t *root = __kmp_root[gtid];
4200 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4201 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4202 KMP_ASSERT(KMP_UBER_GTID(gtid));
4203 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4204 KMP_ASSERT(root->r.r_active == FALSE);
4206 r = __kmp_reset_root(gtid, root);
4208 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4214void __kmp_task_info() {
4216 kmp_int32 gtid = __kmp_entry_gtid();
4217 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4218 kmp_info_t *this_thr = __kmp_threads[gtid];
4219 kmp_team_t *steam = this_thr->th.th_serial_team;
4220 kmp_team_t *team = this_thr->th.th_team;
4223 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4225 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4226 team->t.t_implicit_task_taskdata[tid].td_parent);
4233static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4234 int tid,
int gtid) {
4238 KMP_DEBUG_ASSERT(this_thr != NULL);
4239 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4240 KMP_DEBUG_ASSERT(team);
4241 KMP_DEBUG_ASSERT(team->t.t_threads);
4242 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4243 kmp_info_t *master = team->t.t_threads[0];
4244 KMP_DEBUG_ASSERT(master);
4245 KMP_DEBUG_ASSERT(master->th.th_root);
4249 TCW_SYNC_PTR(this_thr->th.th_team, team);
4251 this_thr->th.th_info.ds.ds_tid = tid;
4252 this_thr->th.th_set_nproc = 0;
4253 if (__kmp_tasking_mode != tskm_immediate_exec)
4256 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4258 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4259 this_thr->th.th_set_proc_bind = proc_bind_default;
4260#if KMP_AFFINITY_SUPPORTED
4261 this_thr->th.th_new_place = this_thr->th.th_current_place;
4263 this_thr->th.th_root = master->th.th_root;
4266 this_thr->th.th_team_nproc = team->t.t_nproc;
4267 this_thr->th.th_team_master = master;
4268 this_thr->th.th_team_serialized = team->t.t_serialized;
4270 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4272 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4273 tid, gtid, this_thr, this_thr->th.th_current_task));
4275 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4278 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4279 tid, gtid, this_thr, this_thr->th.th_current_task));
4284 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4286 this_thr->th.th_local.this_construct = 0;
4288 if (!this_thr->th.th_pri_common) {
4289 this_thr->th.th_pri_common =
4290 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4291 if (__kmp_storage_map) {
4292 __kmp_print_storage_map_gtid(
4293 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4294 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4296 this_thr->th.th_pri_head = NULL;
4299 if (this_thr != master &&
4300 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4302 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4303 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4306 int i = tmp->cg_nthreads--;
4307 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4308 " on node %p of thread %p to %d\n",
4309 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4314 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4316 this_thr->th.th_cg_roots->cg_nthreads++;
4317 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4318 " node %p of thread %p to %d\n",
4319 this_thr, this_thr->th.th_cg_roots,
4320 this_thr->th.th_cg_roots->cg_root,
4321 this_thr->th.th_cg_roots->cg_nthreads));
4322 this_thr->th.th_current_task->td_icvs.thread_limit =
4323 this_thr->th.th_cg_roots->cg_thread_limit;
4328 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4331 sizeof(dispatch_private_info_t) *
4332 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4333 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4334 team->t.t_max_nproc));
4335 KMP_ASSERT(dispatch);
4336 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4337 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4339 dispatch->th_disp_index = 0;
4340 dispatch->th_doacross_buf_idx = 0;
4341 if (!dispatch->th_disp_buffer) {
4342 dispatch->th_disp_buffer =
4343 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4345 if (__kmp_storage_map) {
4346 __kmp_print_storage_map_gtid(
4347 gtid, &dispatch->th_disp_buffer[0],
4348 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4350 : __kmp_dispatch_num_buffers],
4352 "th_%d.th_dispatch.th_disp_buffer "
4353 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4354 gtid, team->t.t_id, gtid);
4357 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4360 dispatch->th_dispatch_pr_current = 0;
4361 dispatch->th_dispatch_sh_current = 0;
4363 dispatch->th_deo_fcn = 0;
4364 dispatch->th_dxo_fcn = 0;
4367 this_thr->th.th_next_pool = NULL;
4369 if (!this_thr->th.th_task_state_memo_stack) {
4371 this_thr->th.th_task_state_memo_stack =
4372 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4373 this_thr->th.th_task_state_top = 0;
4374 this_thr->th.th_task_state_stack_sz = 4;
4375 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4377 this_thr->th.th_task_state_memo_stack[i] = 0;
4380 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4381 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4391kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4393 kmp_team_t *serial_team;
4394 kmp_info_t *new_thr;
4397 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4398 KMP_DEBUG_ASSERT(root && team);
4399#if !KMP_NESTED_HOT_TEAMS
4400 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4405 if (__kmp_thread_pool) {
4406 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4407 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4408 if (new_thr == __kmp_thread_pool_insert_pt) {
4409 __kmp_thread_pool_insert_pt = NULL;
4411 TCW_4(new_thr->th.th_in_pool, FALSE);
4412 __kmp_suspend_initialize_thread(new_thr);
4413 __kmp_lock_suspend_mx(new_thr);
4414 if (new_thr->th.th_active_in_pool == TRUE) {
4415 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4416 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4417 new_thr->th.th_active_in_pool = FALSE;
4419 __kmp_unlock_suspend_mx(new_thr);
4421 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4422 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4423 KMP_ASSERT(!new_thr->th.th_team);
4424 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4427 __kmp_initialize_info(new_thr, team, new_tid,
4428 new_thr->th.th_info.ds.ds_gtid);
4429 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4431 TCW_4(__kmp_nth, __kmp_nth + 1);
4433 new_thr->th.th_task_state = 0;
4434 new_thr->th.th_task_state_top = 0;
4435 new_thr->th.th_task_state_stack_sz = 4;
4437 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4439 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4443#ifdef KMP_ADJUST_BLOCKTIME
4446 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4447 if (__kmp_nth > __kmp_avail_proc) {
4448 __kmp_zero_bt = TRUE;
4457 kmp_balign_t *balign = new_thr->th.th_bar;
4458 for (b = 0; b < bs_last_barrier; ++b)
4459 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4462 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4463 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4470 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4471 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4476 if (!TCR_4(__kmp_init_monitor)) {
4477 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4478 if (!TCR_4(__kmp_init_monitor)) {
4479 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4480 TCW_4(__kmp_init_monitor, 1);
4481 __kmp_create_monitor(&__kmp_monitor);
4482 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4493 while (TCR_4(__kmp_init_monitor) < 2) {
4496 KF_TRACE(10, (
"after monitor thread has started\n"));
4499 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4506 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4508 : __kmp_hidden_helper_threads_num + 1;
4510 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4512 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4515 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4516 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4521 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4523 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4525#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4528 __itt_suppress_mark_range(
4529 __itt_suppress_range, __itt_suppress_threading_errors,
4530 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4531 __itt_suppress_mark_range(
4532 __itt_suppress_range, __itt_suppress_threading_errors,
4533 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4535 __itt_suppress_mark_range(
4536 __itt_suppress_range, __itt_suppress_threading_errors,
4537 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4539 __itt_suppress_mark_range(__itt_suppress_range,
4540 __itt_suppress_threading_errors,
4541 &new_thr->th.th_suspend_init_count,
4542 sizeof(new_thr->th.th_suspend_init_count));
4545 __itt_suppress_mark_range(__itt_suppress_range,
4546 __itt_suppress_threading_errors,
4547 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4548 sizeof(new_thr->th.th_bar[0].bb.b_go));
4549 __itt_suppress_mark_range(__itt_suppress_range,
4550 __itt_suppress_threading_errors,
4551 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4552 sizeof(new_thr->th.th_bar[1].bb.b_go));
4553 __itt_suppress_mark_range(__itt_suppress_range,
4554 __itt_suppress_threading_errors,
4555 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4556 sizeof(new_thr->th.th_bar[2].bb.b_go));
4558 if (__kmp_storage_map) {
4559 __kmp_print_thread_storage_map(new_thr, new_gtid);
4564 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4565 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4566 new_thr->th.th_serial_team = serial_team =
4567 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4571 proc_bind_default, &r_icvs,
4572 0 USE_NESTED_HOT_ARG(NULL));
4574 KMP_ASSERT(serial_team);
4575 serial_team->t.t_serialized = 0;
4577 serial_team->t.t_threads[0] = new_thr;
4579 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4583 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4586 __kmp_initialize_fast_memory(new_thr);
4590 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4591 __kmp_initialize_bget(new_thr);
4594 __kmp_init_random(new_thr);
4598 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4599 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4602 kmp_balign_t *balign = new_thr->th.th_bar;
4603 for (b = 0; b < bs_last_barrier; ++b) {
4604 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4605 balign[b].bb.team = NULL;
4606 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4607 balign[b].bb.use_oncore_barrier = 0;
4610 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4611 new_thr->th.th_sleep_loc_type = flag_unset;
4613 new_thr->th.th_spin_here = FALSE;
4614 new_thr->th.th_next_waiting = 0;
4616 new_thr->th.th_blocking =
false;
4619#if KMP_AFFINITY_SUPPORTED
4620 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4621 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4622 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4623 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4625 new_thr->th.th_def_allocator = __kmp_def_allocator;
4626 new_thr->th.th_prev_level = 0;
4627 new_thr->th.th_prev_num_threads = 1;
4629 TCW_4(new_thr->th.th_in_pool, FALSE);
4630 new_thr->th.th_active_in_pool = FALSE;
4631 TCW_4(new_thr->th.th_active, TRUE);
4639 if (__kmp_adjust_gtid_mode) {
4640 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4641 if (TCR_4(__kmp_gtid_mode) != 2) {
4642 TCW_4(__kmp_gtid_mode, 2);
4645 if (TCR_4(__kmp_gtid_mode) != 1) {
4646 TCW_4(__kmp_gtid_mode, 1);
4651#ifdef KMP_ADJUST_BLOCKTIME
4654 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4655 if (__kmp_nth > __kmp_avail_proc) {
4656 __kmp_zero_bt = TRUE;
4663 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4664 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4666 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4668 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4679static void __kmp_reinitialize_team(kmp_team_t *team,
4680 kmp_internal_control_t *new_icvs,
4682 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4683 team->t.t_threads[0], team));
4684 KMP_DEBUG_ASSERT(team && new_icvs);
4685 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4686 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4688 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4690 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4691 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4693 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4694 team->t.t_threads[0], team));
4700static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4701 kmp_internal_control_t *new_icvs,
4703 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4706 KMP_DEBUG_ASSERT(team);
4707 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4708 KMP_DEBUG_ASSERT(team->t.t_threads);
4711 team->t.t_master_tid = 0;
4713 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4714 team->t.t_nproc = new_nproc;
4717 team->t.t_next_pool = NULL;
4721 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4722 team->t.t_invoke = NULL;
4725 team->t.t_sched.sched = new_icvs->sched.sched;
4727#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4728 team->t.t_fp_control_saved = FALSE;
4729 team->t.t_x87_fpu_control_word = 0;
4730 team->t.t_mxcsr = 0;
4733 team->t.t_construct = 0;
4735 team->t.t_ordered.dt.t_value = 0;
4736 team->t.t_master_active = FALSE;
4739 team->t.t_copypriv_data = NULL;
4742 team->t.t_copyin_counter = 0;
4745 team->t.t_control_stack_top = NULL;
4747 __kmp_reinitialize_team(team, new_icvs, loc);
4750 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4753#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4756__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4757 if (KMP_AFFINITY_CAPABLE()) {
4759 if (old_mask != NULL) {
4760 status = __kmp_get_system_affinity(old_mask, TRUE);
4763 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4767 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4772#if KMP_AFFINITY_SUPPORTED
4778static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4780 if (KMP_HIDDEN_HELPER_TEAM(team))
4783 kmp_info_t *master_th = team->t.t_threads[0];
4784 KMP_DEBUG_ASSERT(master_th != NULL);
4785 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4786 int first_place = master_th->th.th_first_place;
4787 int last_place = master_th->th.th_last_place;
4788 int masters_place = master_th->th.th_current_place;
4789 int num_masks = __kmp_affinity.num_masks;
4790 team->t.t_first_place = first_place;
4791 team->t.t_last_place = last_place;
4793 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4794 "bound to place %d partition = [%d,%d]\n",
4795 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4796 team->t.t_id, masters_place, first_place, last_place));
4798 switch (proc_bind) {
4800 case proc_bind_default:
4803 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4806 case proc_bind_primary: {
4808 int n_th = team->t.t_nproc;
4809 for (f = 1; f < n_th; f++) {
4810 kmp_info_t *th = team->t.t_threads[f];
4811 KMP_DEBUG_ASSERT(th != NULL);
4812 th->th.th_first_place = first_place;
4813 th->th.th_last_place = last_place;
4814 th->th.th_new_place = masters_place;
4815 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4816 team->t.t_display_affinity != 1) {
4817 team->t.t_display_affinity = 1;
4820 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4821 "partition = [%d,%d]\n",
4822 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4823 f, masters_place, first_place, last_place));
4827 case proc_bind_close: {
4829 int n_th = team->t.t_nproc;
4831 if (first_place <= last_place) {
4832 n_places = last_place - first_place + 1;
4834 n_places = num_masks - first_place + last_place + 1;
4836 if (n_th <= n_places) {
4837 int place = masters_place;
4838 for (f = 1; f < n_th; f++) {
4839 kmp_info_t *th = team->t.t_threads[f];
4840 KMP_DEBUG_ASSERT(th != NULL);
4842 if (place == last_place) {
4843 place = first_place;
4844 }
else if (place == (num_masks - 1)) {
4849 th->th.th_first_place = first_place;
4850 th->th.th_last_place = last_place;
4851 th->th.th_new_place = place;
4852 if (__kmp_display_affinity && place != th->th.th_current_place &&
4853 team->t.t_display_affinity != 1) {
4854 team->t.t_display_affinity = 1;
4857 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4858 "partition = [%d,%d]\n",
4859 __kmp_gtid_from_thread(team->t.t_threads[f]),
4860 team->t.t_id, f, place, first_place, last_place));
4863 int S, rem, gap, s_count;
4864 S = n_th / n_places;
4866 rem = n_th - (S * n_places);
4867 gap = rem > 0 ? n_places / rem : n_places;
4868 int place = masters_place;
4870 for (f = 0; f < n_th; f++) {
4871 kmp_info_t *th = team->t.t_threads[f];
4872 KMP_DEBUG_ASSERT(th != NULL);
4874 th->th.th_first_place = first_place;
4875 th->th.th_last_place = last_place;
4876 th->th.th_new_place = place;
4877 if (__kmp_display_affinity && place != th->th.th_current_place &&
4878 team->t.t_display_affinity != 1) {
4879 team->t.t_display_affinity = 1;
4883 if ((s_count == S) && rem && (gap_ct == gap)) {
4885 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4887 if (place == last_place) {
4888 place = first_place;
4889 }
else if (place == (num_masks - 1)) {
4897 }
else if (s_count == S) {
4898 if (place == last_place) {
4899 place = first_place;
4900 }
else if (place == (num_masks - 1)) {
4910 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4911 "partition = [%d,%d]\n",
4912 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4913 th->th.th_new_place, first_place, last_place));
4915 KMP_DEBUG_ASSERT(place == masters_place);
4919 case proc_bind_spread: {
4921 int n_th = team->t.t_nproc;
4924 if (first_place <= last_place) {
4925 n_places = last_place - first_place + 1;
4927 n_places = num_masks - first_place + last_place + 1;
4929 if (n_th <= n_places) {
4932 if (n_places != num_masks) {
4933 int S = n_places / n_th;
4934 int s_count, rem, gap, gap_ct;
4936 place = masters_place;
4937 rem = n_places - n_th * S;
4938 gap = rem ? n_th / rem : 1;
4941 if (update_master_only == 1)
4943 for (f = 0; f < thidx; f++) {
4944 kmp_info_t *th = team->t.t_threads[f];
4945 KMP_DEBUG_ASSERT(th != NULL);
4947 th->th.th_first_place = place;
4948 th->th.th_new_place = place;
4949 if (__kmp_display_affinity && place != th->th.th_current_place &&
4950 team->t.t_display_affinity != 1) {
4951 team->t.t_display_affinity = 1;
4954 while (s_count < S) {
4955 if (place == last_place) {
4956 place = first_place;
4957 }
else if (place == (num_masks - 1)) {
4964 if (rem && (gap_ct == gap)) {
4965 if (place == last_place) {
4966 place = first_place;
4967 }
else if (place == (num_masks - 1)) {
4975 th->th.th_last_place = place;
4978 if (place == last_place) {
4979 place = first_place;
4980 }
else if (place == (num_masks - 1)) {
4987 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4988 "partition = [%d,%d], num_masks: %u\n",
4989 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4990 f, th->th.th_new_place, th->th.th_first_place,
4991 th->th.th_last_place, num_masks));
4997 double current =
static_cast<double>(masters_place);
4999 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5004 if (update_master_only == 1)
5006 for (f = 0; f < thidx; f++) {
5007 first =
static_cast<int>(current);
5008 last =
static_cast<int>(current + spacing) - 1;
5009 KMP_DEBUG_ASSERT(last >= first);
5010 if (first >= n_places) {
5011 if (masters_place) {
5014 if (first == (masters_place + 1)) {
5015 KMP_DEBUG_ASSERT(f == n_th);
5018 if (last == masters_place) {
5019 KMP_DEBUG_ASSERT(f == (n_th - 1));
5023 KMP_DEBUG_ASSERT(f == n_th);
5028 if (last >= n_places) {
5029 last = (n_places - 1);
5034 KMP_DEBUG_ASSERT(0 <= first);
5035 KMP_DEBUG_ASSERT(n_places > first);
5036 KMP_DEBUG_ASSERT(0 <= last);
5037 KMP_DEBUG_ASSERT(n_places > last);
5038 KMP_DEBUG_ASSERT(last_place >= first_place);
5039 th = team->t.t_threads[f];
5040 KMP_DEBUG_ASSERT(th);
5041 th->th.th_first_place = first;
5042 th->th.th_new_place = place;
5043 th->th.th_last_place = last;
5044 if (__kmp_display_affinity && place != th->th.th_current_place &&
5045 team->t.t_display_affinity != 1) {
5046 team->t.t_display_affinity = 1;
5049 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5050 "partition = [%d,%d], spacing = %.4f\n",
5051 __kmp_gtid_from_thread(team->t.t_threads[f]),
5052 team->t.t_id, f, th->th.th_new_place,
5053 th->th.th_first_place, th->th.th_last_place, spacing));
5057 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5059 int S, rem, gap, s_count;
5060 S = n_th / n_places;
5062 rem = n_th - (S * n_places);
5063 gap = rem > 0 ? n_places / rem : n_places;
5064 int place = masters_place;
5067 if (update_master_only == 1)
5069 for (f = 0; f < thidx; f++) {
5070 kmp_info_t *th = team->t.t_threads[f];
5071 KMP_DEBUG_ASSERT(th != NULL);
5073 th->th.th_first_place = place;
5074 th->th.th_last_place = place;
5075 th->th.th_new_place = place;
5076 if (__kmp_display_affinity && place != th->th.th_current_place &&
5077 team->t.t_display_affinity != 1) {
5078 team->t.t_display_affinity = 1;
5082 if ((s_count == S) && rem && (gap_ct == gap)) {
5084 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5086 if (place == last_place) {
5087 place = first_place;
5088 }
else if (place == (num_masks - 1)) {
5096 }
else if (s_count == S) {
5097 if (place == last_place) {
5098 place = first_place;
5099 }
else if (place == (num_masks - 1)) {
5108 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5109 "partition = [%d,%d]\n",
5110 __kmp_gtid_from_thread(team->t.t_threads[f]),
5111 team->t.t_id, f, th->th.th_new_place,
5112 th->th.th_first_place, th->th.th_last_place));
5114 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5122 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5130__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5132 ompt_data_t ompt_parallel_data,
5134 kmp_proc_bind_t new_proc_bind,
5135 kmp_internal_control_t *new_icvs,
5136 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5137 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5140 int use_hot_team = !root->r.r_active;
5142 int do_place_partition = 1;
5144 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5145 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5146 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5149#if KMP_NESTED_HOT_TEAMS
5150 kmp_hot_team_ptr_t *hot_teams;
5152 team = master->th.th_team;
5153 level = team->t.t_active_level;
5154 if (master->th.th_teams_microtask) {
5155 if (master->th.th_teams_size.nteams > 1 &&
5158 (microtask_t)__kmp_teams_master ||
5159 master->th.th_teams_level <
5166 if ((master->th.th_teams_size.nteams == 1 &&
5167 master->th.th_teams_level >= team->t.t_level) ||
5168 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5169 do_place_partition = 0;
5171 hot_teams = master->th.th_hot_teams;
5172 if (level < __kmp_hot_teams_max_level && hot_teams &&
5173 hot_teams[level].hot_team) {
5181 KMP_DEBUG_ASSERT(new_nproc == 1);
5185 if (use_hot_team && new_nproc > 1) {
5186 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5187#if KMP_NESTED_HOT_TEAMS
5188 team = hot_teams[level].hot_team;
5190 team = root->r.r_hot_team;
5193 if (__kmp_tasking_mode != tskm_immediate_exec) {
5194 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5195 "task_team[1] = %p before reinit\n",
5196 team->t.t_task_team[0], team->t.t_task_team[1]));
5200 if (team->t.t_nproc != new_nproc &&
5201 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5203 int old_nthr = team->t.t_nproc;
5204 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5209 if (do_place_partition == 0)
5210 team->t.t_proc_bind = proc_bind_default;
5214 if (team->t.t_nproc == new_nproc) {
5215 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5218 if (team->t.t_size_changed == -1) {
5219 team->t.t_size_changed = 1;
5221 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5225 kmp_r_sched_t new_sched = new_icvs->sched;
5227 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5229 __kmp_reinitialize_team(team, new_icvs,
5230 root->r.r_uber_thread->th.th_ident);
5232 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5233 team->t.t_threads[0], team));
5234 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5236#if KMP_AFFINITY_SUPPORTED
5237 if ((team->t.t_size_changed == 0) &&
5238 (team->t.t_proc_bind == new_proc_bind)) {
5239 if (new_proc_bind == proc_bind_spread) {
5240 if (do_place_partition) {
5242 __kmp_partition_places(team, 1);
5245 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5246 "proc_bind = %d, partition = [%d,%d]\n",
5247 team->t.t_id, new_proc_bind, team->t.t_first_place,
5248 team->t.t_last_place));
5250 if (do_place_partition) {
5251 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5252 __kmp_partition_places(team);
5256 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5258 }
else if (team->t.t_nproc > new_nproc) {
5260 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5263 team->t.t_size_changed = 1;
5264 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5267 __kmp_add_threads_to_team(team, new_nproc);
5269#if KMP_NESTED_HOT_TEAMS
5270 if (__kmp_hot_teams_mode == 0) {
5273 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5274 hot_teams[level].hot_team_nth = new_nproc;
5277 for (f = new_nproc; f < team->t.t_nproc; f++) {
5278 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5279 if (__kmp_tasking_mode != tskm_immediate_exec) {
5282 team->t.t_threads[f]->th.th_task_team = NULL;
5284 __kmp_free_thread(team->t.t_threads[f]);
5285 team->t.t_threads[f] = NULL;
5287#if KMP_NESTED_HOT_TEAMS
5292 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5293 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5294 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5295 for (
int b = 0; b < bs_last_barrier; ++b) {
5296 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5297 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5299 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5304 team->t.t_nproc = new_nproc;
5306 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5307 __kmp_reinitialize_team(team, new_icvs,
5308 root->r.r_uber_thread->th.th_ident);
5311 for (f = 0; f < new_nproc; ++f) {
5312 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5317 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5318 team->t.t_threads[0], team));
5320 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5323 for (f = 0; f < team->t.t_nproc; f++) {
5324 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5325 team->t.t_threads[f]->th.th_team_nproc ==
5330 if (do_place_partition) {
5331 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5332#if KMP_AFFINITY_SUPPORTED
5333 __kmp_partition_places(team);
5337#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5338 kmp_affin_mask_t *old_mask;
5339 if (KMP_AFFINITY_CAPABLE()) {
5340 KMP_CPU_ALLOC(old_mask);
5345 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5347 int old_nproc = team->t.t_nproc;
5348 team->t.t_size_changed = 1;
5350#if KMP_NESTED_HOT_TEAMS
5351 int avail_threads = hot_teams[level].hot_team_nth;
5352 if (new_nproc < avail_threads)
5353 avail_threads = new_nproc;
5354 kmp_info_t **other_threads = team->t.t_threads;
5355 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5359 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5360 for (b = 0; b < bs_last_barrier; ++b) {
5361 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5362 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5364 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5368 if (hot_teams[level].hot_team_nth >= new_nproc) {
5371 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5372 team->t.t_nproc = new_nproc;
5376 team->t.t_nproc = hot_teams[level].hot_team_nth;
5377 hot_teams[level].hot_team_nth = new_nproc;
5379 if (team->t.t_max_nproc < new_nproc) {
5381 __kmp_reallocate_team_arrays(team, new_nproc);
5382 __kmp_reinitialize_team(team, new_icvs, NULL);
5385#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5391 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5395 for (f = team->t.t_nproc; f < new_nproc; f++) {
5396 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5397 KMP_DEBUG_ASSERT(new_worker);
5398 team->t.t_threads[f] = new_worker;
5401 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5402 "join=%llu, plain=%llu\n",
5403 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5404 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5405 team->t.t_bar[bs_plain_barrier].b_arrived));
5409 kmp_balign_t *balign = new_worker->th.th_bar;
5410 for (b = 0; b < bs_last_barrier; ++b) {
5411 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5412 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5413 KMP_BARRIER_PARENT_FLAG);
5415 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5421#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5422 if (KMP_AFFINITY_CAPABLE()) {
5424 __kmp_set_system_affinity(old_mask, TRUE);
5425 KMP_CPU_FREE(old_mask);
5428#if KMP_NESTED_HOT_TEAMS
5431 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5434 __kmp_add_threads_to_team(team, new_nproc);
5438 __kmp_initialize_team(team, new_nproc, new_icvs,
5439 root->r.r_uber_thread->th.th_ident);
5442 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5443 for (f = 0; f < team->t.t_nproc; ++f)
5444 __kmp_initialize_info(team->t.t_threads[f], team, f,
5445 __kmp_gtid_from_tid(f, team));
5453 for (f = old_nproc; f < team->t.t_nproc; ++f)
5454 team->t.t_threads[f]->th.th_task_state =
5455 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5458 kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
5459 for (f = old_nproc; f < team->t.t_nproc; ++f)
5460 team->t.t_threads[f]->th.th_task_state = old_state;
5464 for (f = 0; f < team->t.t_nproc; ++f) {
5465 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5466 team->t.t_threads[f]->th.th_team_nproc ==
5471 if (do_place_partition) {
5472 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5473#if KMP_AFFINITY_SUPPORTED
5474 __kmp_partition_places(team);
5479 kmp_info_t *master = team->t.t_threads[0];
5480 if (master->th.th_teams_microtask) {
5481 for (f = 1; f < new_nproc; ++f) {
5483 kmp_info_t *thr = team->t.t_threads[f];
5484 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5485 thr->th.th_teams_level = master->th.th_teams_level;
5486 thr->th.th_teams_size = master->th.th_teams_size;
5489#if KMP_NESTED_HOT_TEAMS
5493 for (f = 1; f < new_nproc; ++f) {
5494 kmp_info_t *thr = team->t.t_threads[f];
5496 kmp_balign_t *balign = thr->th.th_bar;
5497 for (b = 0; b < bs_last_barrier; ++b) {
5498 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5499 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5501 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5509 __kmp_alloc_argv_entries(argc, team, TRUE);
5510 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5514 KF_TRACE(10, (
" hot_team = %p\n", team));
5517 if (__kmp_tasking_mode != tskm_immediate_exec) {
5518 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5519 "task_team[1] = %p after reinit\n",
5520 team->t.t_task_team[0], team->t.t_task_team[1]));
5525 __ompt_team_assign_id(team, ompt_parallel_data);
5535 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5538 if (team->t.t_max_nproc >= max_nproc) {
5540 __kmp_team_pool = team->t.t_next_pool;
5542 if (max_nproc > 1 &&
5543 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5545 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5550 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5552 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5553 "task_team[1] %p to NULL\n",
5554 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5555 team->t.t_task_team[0] = NULL;
5556 team->t.t_task_team[1] = NULL;
5559 __kmp_alloc_argv_entries(argc, team, TRUE);
5560 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5563 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5564 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5567 for (b = 0; b < bs_last_barrier; ++b) {
5568 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5570 team->t.t_bar[b].b_master_arrived = 0;
5571 team->t.t_bar[b].b_team_arrived = 0;
5576 team->t.t_proc_bind = new_proc_bind;
5578 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5582 __ompt_team_assign_id(team, ompt_parallel_data);
5594 team = __kmp_reap_team(team);
5595 __kmp_team_pool = team;
5600 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5603 team->t.t_max_nproc = max_nproc;
5604 if (max_nproc > 1 &&
5605 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5607 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5612 __kmp_allocate_team_arrays(team, max_nproc);
5614 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5615 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5617 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5619 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5620 team->t.t_task_team[0] = NULL;
5622 team->t.t_task_team[1] = NULL;
5625 if (__kmp_storage_map) {
5626 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5630 __kmp_alloc_argv_entries(argc, team, FALSE);
5631 team->t.t_argc = argc;
5634 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5635 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5638 for (b = 0; b < bs_last_barrier; ++b) {
5639 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5641 team->t.t_bar[b].b_master_arrived = 0;
5642 team->t.t_bar[b].b_team_arrived = 0;
5647 team->t.t_proc_bind = new_proc_bind;
5650 __ompt_team_assign_id(team, ompt_parallel_data);
5651 team->t.ompt_serialized_team_info = NULL;
5656 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5667void __kmp_free_team(kmp_root_t *root,
5668 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5670 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5674 KMP_DEBUG_ASSERT(root);
5675 KMP_DEBUG_ASSERT(team);
5676 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5677 KMP_DEBUG_ASSERT(team->t.t_threads);
5679 int use_hot_team = team == root->r.r_hot_team;
5680#if KMP_NESTED_HOT_TEAMS
5683 level = team->t.t_active_level - 1;
5684 if (master->th.th_teams_microtask) {
5685 if (master->th.th_teams_size.nteams > 1) {
5689 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5690 master->th.th_teams_level == team->t.t_level) {
5696 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5698 if (level < __kmp_hot_teams_max_level) {
5699 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5706 TCW_SYNC_PTR(team->t.t_pkfn,
5709 team->t.t_copyin_counter = 0;
5714 if (!use_hot_team) {
5715 if (__kmp_tasking_mode != tskm_immediate_exec) {
5717 for (f = 1; f < team->t.t_nproc; ++f) {
5718 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5719 kmp_info_t *th = team->t.t_threads[f];
5720 volatile kmp_uint32 *state = &th->th.th_reap_state;
5721 while (*state != KMP_SAFE_TO_REAP) {
5725 if (!__kmp_is_thread_alive(th, &ecode)) {
5726 *state = KMP_SAFE_TO_REAP;
5731 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5732 if (fl.is_sleeping())
5733 fl.resume(__kmp_gtid_from_thread(th));
5740 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5741 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5742 if (task_team != NULL) {
5743 for (f = 0; f < team->t.t_nproc; ++f) {
5744 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5745 team->t.t_threads[f]->th.th_task_team = NULL;
5749 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5750 __kmp_get_gtid(), task_team, team->t.t_id));
5751#if KMP_NESTED_HOT_TEAMS
5752 __kmp_free_task_team(master, task_team);
5754 team->t.t_task_team[tt_idx] = NULL;
5760 team->t.t_parent = NULL;
5761 team->t.t_level = 0;
5762 team->t.t_active_level = 0;
5765 for (f = 1; f < team->t.t_nproc; ++f) {
5766 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5767 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5768 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5771 __kmp_free_thread(team->t.t_threads[f]);
5774 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5777 team->t.b->go_release();
5778 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5779 for (f = 1; f < team->t.t_nproc; ++f) {
5780 if (team->t.b->sleep[f].sleep) {
5781 __kmp_atomic_resume_64(
5782 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5783 (kmp_atomic_flag_64<> *)NULL);
5788 for (
int f = 1; f < team->t.t_nproc; ++f) {
5789 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5795 for (f = 1; f < team->t.t_nproc; ++f) {
5796 team->t.t_threads[f] = NULL;
5799 if (team->t.t_max_nproc > 1 &&
5800 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5801 distributedBarrier::deallocate(team->t.b);
5806 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5807 __kmp_team_pool = (
volatile kmp_team_t *)team;
5810 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5811 team->t.t_threads[1]->th.th_cg_roots);
5812 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5814 for (f = 1; f < team->t.t_nproc; ++f) {
5815 kmp_info_t *thr = team->t.t_threads[f];
5816 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5817 thr->th.th_cg_roots->cg_root == thr);
5819 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5820 thr->th.th_cg_roots = tmp->up;
5821 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5822 " up to node %p. cg_nthreads was %d\n",
5823 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5824 int i = tmp->cg_nthreads--;
5829 if (thr->th.th_cg_roots)
5830 thr->th.th_current_task->td_icvs.thread_limit =
5831 thr->th.th_cg_roots->cg_thread_limit;
5840kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5841 kmp_team_t *next_pool = team->t.t_next_pool;
5843 KMP_DEBUG_ASSERT(team);
5844 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5845 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5846 KMP_DEBUG_ASSERT(team->t.t_threads);
5847 KMP_DEBUG_ASSERT(team->t.t_argv);
5852 __kmp_free_team_arrays(team);
5853 if (team->t.t_argv != &team->t.t_inline_argv[0])
5854 __kmp_free((
void *)team->t.t_argv);
5886void __kmp_free_thread(kmp_info_t *this_th) {
5890 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5891 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5893 KMP_DEBUG_ASSERT(this_th);
5898 kmp_balign_t *balign = this_th->th.th_bar;
5899 for (b = 0; b < bs_last_barrier; ++b) {
5900 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5901 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5902 balign[b].bb.team = NULL;
5903 balign[b].bb.leaf_kids = 0;
5905 this_th->th.th_task_state = 0;
5906 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5909 TCW_PTR(this_th->th.th_team, NULL);
5910 TCW_PTR(this_th->th.th_root, NULL);
5911 TCW_PTR(this_th->th.th_dispatch, NULL);
5913 while (this_th->th.th_cg_roots) {
5914 this_th->th.th_cg_roots->cg_nthreads--;
5915 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5916 " %p of thread %p to %d\n",
5917 this_th, this_th->th.th_cg_roots,
5918 this_th->th.th_cg_roots->cg_root,
5919 this_th->th.th_cg_roots->cg_nthreads));
5920 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5921 if (tmp->cg_root == this_th) {
5922 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5924 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5925 this_th->th.th_cg_roots = tmp->up;
5928 if (tmp->cg_nthreads == 0) {
5931 this_th->th.th_cg_roots = NULL;
5941 __kmp_free_implicit_task(this_th);
5942 this_th->th.th_current_task = NULL;
5946 gtid = this_th->th.th_info.ds.ds_gtid;
5947 if (__kmp_thread_pool_insert_pt != NULL) {
5948 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5949 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5950 __kmp_thread_pool_insert_pt = NULL;
5959 if (__kmp_thread_pool_insert_pt != NULL) {
5960 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5962 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5964 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5965 scan = &((*scan)->th.th_next_pool))
5970 TCW_PTR(this_th->th.th_next_pool, *scan);
5971 __kmp_thread_pool_insert_pt = *scan = this_th;
5972 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5973 (this_th->th.th_info.ds.ds_gtid <
5974 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5975 TCW_4(this_th->th.th_in_pool, TRUE);
5976 __kmp_suspend_initialize_thread(this_th);
5977 __kmp_lock_suspend_mx(this_th);
5978 if (this_th->th.th_active == TRUE) {
5979 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5980 this_th->th.th_active_in_pool = TRUE;
5984 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5987 __kmp_unlock_suspend_mx(this_th);
5989 TCW_4(__kmp_nth, __kmp_nth - 1);
5991#ifdef KMP_ADJUST_BLOCKTIME
5994 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5995 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5996 if (__kmp_nth <= __kmp_avail_proc) {
5997 __kmp_zero_bt = FALSE;
6007void *__kmp_launch_thread(kmp_info_t *this_thr) {
6008#if OMP_PROFILING_SUPPORT
6009 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
6011 if (ProfileTraceFile)
6012 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
6015 int gtid = this_thr->th.th_info.ds.ds_gtid;
6017 kmp_team_t **
volatile pteam;
6020 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
6022 if (__kmp_env_consistency_check) {
6023 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6027 if (ompd_state & OMPD_ENABLE_BP)
6028 ompd_bp_thread_begin();
6032 ompt_data_t *thread_data =
nullptr;
6033 if (ompt_enabled.enabled) {
6034 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6035 *thread_data = ompt_data_none;
6037 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6038 this_thr->th.ompt_thread_info.wait_id = 0;
6039 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6040 this_thr->th.ompt_thread_info.parallel_flags = 0;
6041 if (ompt_enabled.ompt_callback_thread_begin) {
6042 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6043 ompt_thread_worker, thread_data);
6045 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6050 while (!TCR_4(__kmp_global.g.g_done)) {
6051 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6055 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6058 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6061 if (ompt_enabled.enabled) {
6062 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6066 pteam = &this_thr->th.th_team;
6069 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6071 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6074 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6075 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6076 (*pteam)->t.t_pkfn));
6078 updateHWFPControl(*pteam);
6081 if (ompt_enabled.enabled) {
6082 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6086 rc = (*pteam)->t.t_invoke(gtid);
6090 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6091 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6092 (*pteam)->t.t_pkfn));
6095 if (ompt_enabled.enabled) {
6097 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6099 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6103 __kmp_join_barrier(gtid);
6106 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
6109 if (ompd_state & OMPD_ENABLE_BP)
6110 ompd_bp_thread_end();
6114 if (ompt_enabled.ompt_callback_thread_end) {
6115 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6119 this_thr->th.th_task_team = NULL;
6121 __kmp_common_destroy_gtid(gtid);
6123 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6126#if OMP_PROFILING_SUPPORT
6127 llvm::timeTraceProfilerFinishThread();
6134void __kmp_internal_end_dest(
void *specific_gtid) {
6137 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6139 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6143 __kmp_internal_end_thread(gtid);
6146#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6148__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6149 __kmp_internal_end_atexit();
6156void __kmp_internal_end_atexit(
void) {
6157 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6181 __kmp_internal_end_library(-1);
6183 __kmp_close_console();
6187static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6192 KMP_DEBUG_ASSERT(thread != NULL);
6194 gtid = thread->th.th_info.ds.ds_gtid;
6197 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6200 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6202 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6204 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6206 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6210 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6212 __kmp_release_64(&flag);
6217 __kmp_reap_worker(thread);
6229 if (thread->th.th_active_in_pool) {
6230 thread->th.th_active_in_pool = FALSE;
6231 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6232 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6236 __kmp_free_implicit_task(thread);
6240 __kmp_free_fast_memory(thread);
6243 __kmp_suspend_uninitialize_thread(thread);
6245 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6246 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6251#ifdef KMP_ADJUST_BLOCKTIME
6254 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6255 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6256 if (__kmp_nth <= __kmp_avail_proc) {
6257 __kmp_zero_bt = FALSE;
6263 if (__kmp_env_consistency_check) {
6264 if (thread->th.th_cons) {
6265 __kmp_free_cons_stack(thread->th.th_cons);
6266 thread->th.th_cons = NULL;
6270 if (thread->th.th_pri_common != NULL) {
6271 __kmp_free(thread->th.th_pri_common);
6272 thread->th.th_pri_common = NULL;
6275 if (thread->th.th_task_state_memo_stack != NULL) {
6276 __kmp_free(thread->th.th_task_state_memo_stack);
6277 thread->th.th_task_state_memo_stack = NULL;
6281 if (thread->th.th_local.bget_data != NULL) {
6282 __kmp_finalize_bget(thread);
6286#if KMP_AFFINITY_SUPPORTED
6287 if (thread->th.th_affin_mask != NULL) {
6288 KMP_CPU_FREE(thread->th.th_affin_mask);
6289 thread->th.th_affin_mask = NULL;
6293#if KMP_USE_HIER_SCHED
6294 if (thread->th.th_hier_bar_data != NULL) {
6295 __kmp_free(thread->th.th_hier_bar_data);
6296 thread->th.th_hier_bar_data = NULL;
6300 __kmp_reap_team(thread->th.th_serial_team);
6301 thread->th.th_serial_team = NULL;
6308static void __kmp_itthash_clean(kmp_info_t *th) {
6310 if (__kmp_itt_region_domains.count > 0) {
6311 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6312 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6314 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6315 __kmp_thread_free(th, bucket);
6320 if (__kmp_itt_barrier_domains.count > 0) {
6321 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6322 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6324 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6325 __kmp_thread_free(th, bucket);
6333static void __kmp_internal_end(
void) {
6337 __kmp_unregister_library();
6344 __kmp_reclaim_dead_roots();
6348 for (i = 0; i < __kmp_threads_capacity; i++)
6350 if (__kmp_root[i]->r.r_active)
6353 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6355 if (i < __kmp_threads_capacity) {
6367 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6368 if (TCR_4(__kmp_init_monitor)) {
6369 __kmp_reap_monitor(&__kmp_monitor);
6370 TCW_4(__kmp_init_monitor, 0);
6372 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6373 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6379 for (i = 0; i < __kmp_threads_capacity; i++) {
6380 if (__kmp_root[i]) {
6383 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6392 while (__kmp_thread_pool != NULL) {
6394 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6395 __kmp_thread_pool = thread->th.th_next_pool;
6397 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6398 thread->th.th_next_pool = NULL;
6399 thread->th.th_in_pool = FALSE;
6400 __kmp_reap_thread(thread, 0);
6402 __kmp_thread_pool_insert_pt = NULL;
6405 while (__kmp_team_pool != NULL) {
6407 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6408 __kmp_team_pool = team->t.t_next_pool;
6410 team->t.t_next_pool = NULL;
6411 __kmp_reap_team(team);
6414 __kmp_reap_task_teams();
6421 for (i = 0; i < __kmp_threads_capacity; i++) {
6422 kmp_info_t *thr = __kmp_threads[i];
6423 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6428 for (i = 0; i < __kmp_threads_capacity; ++i) {
6435 TCW_SYNC_4(__kmp_init_common, FALSE);
6437 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6445 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6446 if (TCR_4(__kmp_init_monitor)) {
6447 __kmp_reap_monitor(&__kmp_monitor);
6448 TCW_4(__kmp_init_monitor, 0);
6450 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6451 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6454 TCW_4(__kmp_init_gtid, FALSE);
6463void __kmp_internal_end_library(
int gtid_req) {
6470 if (__kmp_global.g.g_abort) {
6471 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6475 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6476 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6481 if (TCR_4(__kmp_init_hidden_helper) &&
6482 !TCR_4(__kmp_hidden_helper_team_done)) {
6483 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6485 __kmp_hidden_helper_main_thread_release();
6487 __kmp_hidden_helper_threads_deinitz_wait();
6493 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6495 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6496 if (gtid == KMP_GTID_SHUTDOWN) {
6497 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6498 "already shutdown\n"));
6500 }
else if (gtid == KMP_GTID_MONITOR) {
6501 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6502 "registered, or system shutdown\n"));
6504 }
else if (gtid == KMP_GTID_DNE) {
6505 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6508 }
else if (KMP_UBER_GTID(gtid)) {
6510 if (__kmp_root[gtid]->r.r_active) {
6511 __kmp_global.g.g_abort = -1;
6512 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6513 __kmp_unregister_library();
6515 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6519 __kmp_itthash_clean(__kmp_threads[gtid]);
6522 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6523 __kmp_unregister_root_current_thread(gtid);
6530#ifdef DUMP_DEBUG_ON_EXIT
6531 if (__kmp_debug_buf)
6532 __kmp_dump_debug_buffer();
6537 __kmp_unregister_library();
6542 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6545 if (__kmp_global.g.g_abort) {
6546 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6548 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6551 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6552 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6561 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6564 __kmp_internal_end();
6566 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6567 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6569 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6571#ifdef DUMP_DEBUG_ON_EXIT
6572 if (__kmp_debug_buf)
6573 __kmp_dump_debug_buffer();
6577 __kmp_close_console();
6580 __kmp_fini_allocator();
6584void __kmp_internal_end_thread(
int gtid_req) {
6593 if (__kmp_global.g.g_abort) {
6594 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6598 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6599 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6604 if (TCR_4(__kmp_init_hidden_helper) &&
6605 !TCR_4(__kmp_hidden_helper_team_done)) {
6606 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6608 __kmp_hidden_helper_main_thread_release();
6610 __kmp_hidden_helper_threads_deinitz_wait();
6617 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6619 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6620 if (gtid == KMP_GTID_SHUTDOWN) {
6621 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6622 "already shutdown\n"));
6624 }
else if (gtid == KMP_GTID_MONITOR) {
6625 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6626 "registered, or system shutdown\n"));
6628 }
else if (gtid == KMP_GTID_DNE) {
6629 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6633 }
else if (KMP_UBER_GTID(gtid)) {
6635 if (__kmp_root[gtid]->r.r_active) {
6636 __kmp_global.g.g_abort = -1;
6637 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6639 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6643 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6645 __kmp_unregister_root_current_thread(gtid);
6649 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6652 __kmp_threads[gtid]->th.th_task_team = NULL;
6656 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6662 if (__kmp_pause_status != kmp_hard_paused)
6666 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6671 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6674 if (__kmp_global.g.g_abort) {
6675 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6677 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6680 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6681 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6692 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6694 for (i = 0; i < __kmp_threads_capacity; ++i) {
6695 if (KMP_UBER_GTID(i)) {
6698 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6699 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6700 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6707 __kmp_internal_end();
6709 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6710 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6712 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6714#ifdef DUMP_DEBUG_ON_EXIT
6715 if (__kmp_debug_buf)
6716 __kmp_dump_debug_buffer();
6723static long __kmp_registration_flag = 0;
6725static char *__kmp_registration_str = NULL;
6728static inline char *__kmp_reg_status_name() {
6734#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6735 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6738 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6742#if defined(KMP_USE_SHM)
6744char *temp_reg_status_file_name =
nullptr;
6747void __kmp_register_library_startup(
void) {
6749 char *name = __kmp_reg_status_name();
6755#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6756 __kmp_initialize_system_tick();
6758 __kmp_read_system_time(&time.dtime);
6759 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6760 __kmp_registration_str =
6761 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6762 __kmp_registration_flag, KMP_LIBRARY_FILE);
6764 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6765 __kmp_registration_str));
6771#if defined(KMP_USE_SHM)
6772 char *shm_name = __kmp_str_format(
"/%s", name);
6773 int shm_preexist = 0;
6775 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6776 if ((fd1 == -1) && (errno == EEXIST)) {
6779 fd1 = shm_open(shm_name, O_RDWR, 0666);
6782 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6788 }
else if (fd1 == -1) {
6793 char *temp_file_name = __kmp_str_format(
"/tmp/%sXXXXXX", name);
6794 fd1 = mkstemp(temp_file_name);
6797 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open TEMP"), KMP_ERR(errno),
6800 temp_reg_status_file_name = temp_file_name;
6802 if (shm_preexist == 0) {
6804 if (ftruncate(fd1, SHM_SIZE) == -1) {
6806 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6807 KMP_ERR(errno), __kmp_msg_null);
6811 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6812 if (data1 == MAP_FAILED) {
6814 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6817 if (shm_preexist == 0) {
6818 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6821 value = __kmp_str_format(
"%s", data1);
6822 munmap(data1, SHM_SIZE);
6826 __kmp_env_set(name, __kmp_registration_str, 0);
6828 value = __kmp_env_get(name);
6831 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6838 char *flag_addr_str = NULL;
6839 char *flag_val_str = NULL;
6840 char const *file_name = NULL;
6841 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6842 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6845 unsigned long *flag_addr = 0;
6846 unsigned long flag_val = 0;
6847 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6848 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6849 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6853 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6867 file_name =
"unknown library";
6872 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6873 if (!__kmp_str_match_true(duplicate_ok)) {
6875 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6876 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6878 KMP_INTERNAL_FREE(duplicate_ok);
6879 __kmp_duplicate_library_ok = 1;
6884#if defined(KMP_USE_SHM)
6886 shm_unlink(shm_name);
6889 __kmp_env_unset(name);
6893 KMP_DEBUG_ASSERT(0);
6897 KMP_INTERNAL_FREE((
void *)value);
6898#if defined(KMP_USE_SHM)
6899 KMP_INTERNAL_FREE((
void *)shm_name);
6902 KMP_INTERNAL_FREE((
void *)name);
6906void __kmp_unregister_library(
void) {
6908 char *name = __kmp_reg_status_name();
6911#if defined(KMP_USE_SHM)
6912 bool use_shm =
true;
6913 char *shm_name = __kmp_str_format(
"/%s", name);
6914 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6918 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6919 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6925 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6926 if (data1 != MAP_FAILED) {
6927 value = __kmp_str_format(
"%s", data1);
6928 munmap(data1, SHM_SIZE);
6932 value = __kmp_env_get(name);
6935 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6936 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6937 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6939#if defined(KMP_USE_SHM)
6941 shm_unlink(shm_name);
6943 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6944 unlink(temp_reg_status_file_name);
6947 __kmp_env_unset(name);
6951#if defined(KMP_USE_SHM)
6952 KMP_INTERNAL_FREE(shm_name);
6954 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6955 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6959 KMP_INTERNAL_FREE(__kmp_registration_str);
6960 KMP_INTERNAL_FREE(value);
6961 KMP_INTERNAL_FREE(name);
6963 __kmp_registration_flag = 0;
6964 __kmp_registration_str = NULL;
6971#if KMP_MIC_SUPPORTED
6973static void __kmp_check_mic_type() {
6974 kmp_cpuid_t cpuid_state = {0};
6975 kmp_cpuid_t *cs_p = &cpuid_state;
6976 __kmp_x86_cpuid(1, 0, cs_p);
6978 if ((cs_p->eax & 0xff0) == 0xB10) {
6979 __kmp_mic_type = mic2;
6980 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6981 __kmp_mic_type = mic3;
6983 __kmp_mic_type = non_mic;
6990static void __kmp_user_level_mwait_init() {
6991 struct kmp_cpuid buf;
6992 __kmp_x86_cpuid(7, 0, &buf);
6993 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6994 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6995 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6996 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6997 __kmp_umwait_enabled));
7000#ifndef AT_INTELPHIUSERMWAIT
7003#define AT_INTELPHIUSERMWAIT 10000
7008unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7009unsigned long getauxval(
unsigned long) {
return 0; }
7011static void __kmp_user_level_mwait_init() {
7016 if (__kmp_mic_type == mic3) {
7017 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7018 if ((res & 0x1) || __kmp_user_level_mwait) {
7019 __kmp_mwait_enabled = TRUE;
7020 if (__kmp_user_level_mwait) {
7021 KMP_INFORM(EnvMwaitWarn);
7024 __kmp_mwait_enabled = FALSE;
7027 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7028 "__kmp_mwait_enabled = %d\n",
7029 __kmp_mic_type, __kmp_mwait_enabled));
7033static void __kmp_do_serial_initialize(
void) {
7037 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7039 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7040 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7041 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7042 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7043 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7053 __kmp_validate_locks();
7056 __kmp_init_allocator();
7062 if (__kmp_need_register_serial)
7063 __kmp_register_library_startup();
7066 if (TCR_4(__kmp_global.g.g_done)) {
7067 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7070 __kmp_global.g.g_abort = 0;
7071 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7074#if KMP_USE_ADAPTIVE_LOCKS
7075#if KMP_DEBUG_ADAPTIVE_LOCKS
7076 __kmp_init_speculative_stats();
7079#if KMP_STATS_ENABLED
7082 __kmp_init_lock(&__kmp_global_lock);
7083 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7084 __kmp_init_lock(&__kmp_debug_lock);
7085 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7086 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7087 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7088 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7089 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7090 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7091 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7092 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7093 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7094 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7095 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7096 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7097 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7098 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7099 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7101 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7103 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7107 __kmp_runtime_initialize();
7109#if KMP_MIC_SUPPORTED
7110 __kmp_check_mic_type();
7117 __kmp_abort_delay = 0;
7121 __kmp_dflt_team_nth_ub = __kmp_xproc;
7122 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7123 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7125 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7126 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7128 __kmp_max_nth = __kmp_sys_max_nth;
7129 __kmp_cg_max_nth = __kmp_sys_max_nth;
7130 __kmp_teams_max_nth = __kmp_xproc;
7131 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7132 __kmp_teams_max_nth = __kmp_sys_max_nth;
7137 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7139 __kmp_monitor_wakeups =
7140 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7141 __kmp_bt_intervals =
7142 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7145 __kmp_library = library_throughput;
7147 __kmp_static = kmp_sch_static_balanced;
7154#if KMP_FAST_REDUCTION_BARRIER
7155#define kmp_reduction_barrier_gather_bb ((int)1)
7156#define kmp_reduction_barrier_release_bb ((int)1)
7157#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7158#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7160 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7161 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7162 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7163 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7164 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7165#if KMP_FAST_REDUCTION_BARRIER
7166 if (i == bs_reduction_barrier) {
7168 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7169 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7170 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7171 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7175#if KMP_FAST_REDUCTION_BARRIER
7176#undef kmp_reduction_barrier_release_pat
7177#undef kmp_reduction_barrier_gather_pat
7178#undef kmp_reduction_barrier_release_bb
7179#undef kmp_reduction_barrier_gather_bb
7181#if KMP_MIC_SUPPORTED
7182 if (__kmp_mic_type == mic2) {
7184 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7185 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7187 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7188 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7190#if KMP_FAST_REDUCTION_BARRIER
7191 if (__kmp_mic_type == mic2) {
7192 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7193 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7200 __kmp_env_checks = TRUE;
7202 __kmp_env_checks = FALSE;
7206 __kmp_foreign_tp = TRUE;
7208 __kmp_global.g.g_dynamic = FALSE;
7209 __kmp_global.g.g_dynamic_mode = dynamic_default;
7211 __kmp_init_nesting_mode();
7213 __kmp_env_initialize(NULL);
7215#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7216 __kmp_user_level_mwait_init();
7220 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7221 if (__kmp_str_match_true(val)) {
7222 kmp_str_buf_t buffer;
7223 __kmp_str_buf_init(&buffer);
7224 __kmp_i18n_dump_catalog(&buffer);
7225 __kmp_printf(
"%s", buffer.str);
7226 __kmp_str_buf_free(&buffer);
7228 __kmp_env_free(&val);
7231 __kmp_threads_capacity =
7232 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7234 __kmp_tp_capacity = __kmp_default_tp_capacity(
7235 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7240 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7241 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7242 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7243 __kmp_thread_pool = NULL;
7244 __kmp_thread_pool_insert_pt = NULL;
7245 __kmp_team_pool = NULL;
7252 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7254 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7255 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7256 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7259 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7261 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7266 gtid = __kmp_register_root(TRUE);
7267 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7268 KMP_ASSERT(KMP_UBER_GTID(gtid));
7269 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7273 __kmp_common_initialize();
7277 __kmp_register_atfork();
7280#if !KMP_DYNAMIC_LIB || \
7281 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7286 int rc = atexit(__kmp_internal_end_atexit);
7288 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7294#if KMP_HANDLE_SIGNALS
7300 __kmp_install_signals(FALSE);
7303 __kmp_install_signals(TRUE);
7308 __kmp_init_counter++;
7310 __kmp_init_serial = TRUE;
7312 if (__kmp_settings) {
7316 if (__kmp_display_env || __kmp_display_env_verbose) {
7317 __kmp_env_print_2();
7326 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7329void __kmp_serial_initialize(
void) {
7330 if (__kmp_init_serial) {
7333 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7334 if (__kmp_init_serial) {
7335 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7338 __kmp_do_serial_initialize();
7339 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7342static void __kmp_do_middle_initialize(
void) {
7344 int prev_dflt_team_nth;
7346 if (!__kmp_init_serial) {
7347 __kmp_do_serial_initialize();
7350 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7352 if (UNLIKELY(!__kmp_need_register_serial)) {
7355 __kmp_register_library_startup();
7360 prev_dflt_team_nth = __kmp_dflt_team_nth;
7362#if KMP_AFFINITY_SUPPORTED
7365 __kmp_affinity_initialize(__kmp_affinity);
7369 KMP_ASSERT(__kmp_xproc > 0);
7370 if (__kmp_avail_proc == 0) {
7371 __kmp_avail_proc = __kmp_xproc;
7377 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7378 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7383 if (__kmp_dflt_team_nth == 0) {
7384#ifdef KMP_DFLT_NTH_CORES
7386 __kmp_dflt_team_nth = __kmp_ncores;
7387 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7388 "__kmp_ncores (%d)\n",
7389 __kmp_dflt_team_nth));
7392 __kmp_dflt_team_nth = __kmp_avail_proc;
7393 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7394 "__kmp_avail_proc(%d)\n",
7395 __kmp_dflt_team_nth));
7399 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7400 __kmp_dflt_team_nth = KMP_MIN_NTH;
7402 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7403 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7406 if (__kmp_nesting_mode > 0)
7407 __kmp_set_nesting_mode_threads();
7411 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7413 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7418 for (i = 0; i < __kmp_threads_capacity; i++) {
7419 kmp_info_t *thread = __kmp_threads[i];
7422 if (thread->th.th_current_task->td_icvs.nproc != 0)
7425 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7430 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7431 __kmp_dflt_team_nth));
7433#ifdef KMP_ADJUST_BLOCKTIME
7435 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7436 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7437 if (__kmp_nth > __kmp_avail_proc) {
7438 __kmp_zero_bt = TRUE;
7444 TCW_SYNC_4(__kmp_init_middle, TRUE);
7446 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7449void __kmp_middle_initialize(
void) {
7450 if (__kmp_init_middle) {
7453 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7454 if (__kmp_init_middle) {
7455 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7458 __kmp_do_middle_initialize();
7459 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7462void __kmp_parallel_initialize(
void) {
7463 int gtid = __kmp_entry_gtid();
7466 if (TCR_4(__kmp_init_parallel))
7468 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7469 if (TCR_4(__kmp_init_parallel)) {
7470 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7475 if (TCR_4(__kmp_global.g.g_done)) {
7478 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7479 __kmp_infinite_loop();
7485 if (!__kmp_init_middle) {
7486 __kmp_do_middle_initialize();
7488 __kmp_assign_root_init_mask();
7489 __kmp_resume_if_hard_paused();
7492 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7493 KMP_ASSERT(KMP_UBER_GTID(gtid));
7495#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7498 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7499 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7500 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7504#if KMP_HANDLE_SIGNALS
7506 __kmp_install_signals(TRUE);
7510 __kmp_suspend_initialize();
7512#if defined(USE_LOAD_BALANCE)
7513 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7514 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7517 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7518 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7522 if (__kmp_version) {
7523 __kmp_print_version_2();
7527 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7530 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7532 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7535void __kmp_hidden_helper_initialize() {
7536 if (TCR_4(__kmp_init_hidden_helper))
7540 if (!TCR_4(__kmp_init_parallel))
7541 __kmp_parallel_initialize();
7545 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7546 if (TCR_4(__kmp_init_hidden_helper)) {
7547 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7551#if KMP_AFFINITY_SUPPORTED
7555 if (!__kmp_hh_affinity.flags.initialized)
7556 __kmp_affinity_initialize(__kmp_hh_affinity);
7560 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7564 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7567 __kmp_do_initialize_hidden_helper_threads();
7570 __kmp_hidden_helper_threads_initz_wait();
7573 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7575 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7580void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7582 kmp_disp_t *dispatch;
7587 this_thr->th.th_local.this_construct = 0;
7589 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7591 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7592 KMP_DEBUG_ASSERT(dispatch);
7593 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7597 dispatch->th_disp_index = 0;
7598 dispatch->th_doacross_buf_idx = 0;
7599 if (__kmp_env_consistency_check)
7600 __kmp_push_parallel(gtid, team->t.t_ident);
7605void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7607 if (__kmp_env_consistency_check)
7608 __kmp_pop_parallel(gtid, team->t.t_ident);
7610 __kmp_finish_implicit_task(this_thr);
7613int __kmp_invoke_task_func(
int gtid) {
7615 int tid = __kmp_tid_from_gtid(gtid);
7616 kmp_info_t *this_thr = __kmp_threads[gtid];
7617 kmp_team_t *team = this_thr->th.th_team;
7619 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7621 if (__itt_stack_caller_create_ptr) {
7623 if (team->t.t_stack_id != NULL) {
7624 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7626 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7627 __kmp_itt_stack_callee_enter(
7628 (__itt_caller)team->t.t_parent->t.t_stack_id);
7632#if INCLUDE_SSC_MARKS
7633 SSC_MARK_INVOKING();
7638 void **exit_frame_p;
7639 ompt_data_t *my_task_data;
7640 ompt_data_t *my_parallel_data;
7643 if (ompt_enabled.enabled) {
7644 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7645 .ompt_task_info.frame.exit_frame.ptr);
7647 exit_frame_p = &dummy;
7651 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7652 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7653 if (ompt_enabled.ompt_callback_implicit_task) {
7654 ompt_team_size = team->t.t_nproc;
7655 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7656 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7657 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7658 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7662#if KMP_STATS_ENABLED
7664 if (previous_state == stats_state_e::TEAMS_REGION) {
7665 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7667 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7669 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7672 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7673 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7680 *exit_frame_p = NULL;
7681 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7684#if KMP_STATS_ENABLED
7685 if (previous_state == stats_state_e::TEAMS_REGION) {
7686 KMP_SET_THREAD_STATE(previous_state);
7688 KMP_POP_PARTITIONED_TIMER();
7692 if (__itt_stack_caller_create_ptr) {
7694 if (team->t.t_stack_id != NULL) {
7695 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7697 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7698 __kmp_itt_stack_callee_leave(
7699 (__itt_caller)team->t.t_parent->t.t_stack_id);
7703 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7708void __kmp_teams_master(
int gtid) {
7710 kmp_info_t *thr = __kmp_threads[gtid];
7711 kmp_team_t *team = thr->th.th_team;
7712 ident_t *loc = team->t.t_ident;
7713 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7714 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7715 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7716 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7717 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7720 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7723 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7724 tmp->cg_nthreads = 1;
7725 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7726 " cg_nthreads to 1\n",
7728 tmp->up = thr->th.th_cg_roots;
7729 thr->th.th_cg_roots = tmp;
7733#if INCLUDE_SSC_MARKS
7736 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7737 (microtask_t)thr->th.th_teams_microtask,
7738 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7739#if INCLUDE_SSC_MARKS
7743 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7744 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7747 __kmp_join_call(loc, gtid
7756int __kmp_invoke_teams_master(
int gtid) {
7757 kmp_info_t *this_thr = __kmp_threads[gtid];
7758 kmp_team_t *team = this_thr->th.th_team;
7760 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7761 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7762 (
void *)__kmp_teams_master);
7764 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7766 int tid = __kmp_tid_from_gtid(gtid);
7767 ompt_data_t *task_data =
7768 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7769 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7770 if (ompt_enabled.ompt_callback_implicit_task) {
7771 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7772 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7774 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7777 __kmp_teams_master(gtid);
7779 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7781 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7790void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7791 kmp_info_t *thr = __kmp_threads[gtid];
7793 if (num_threads > 0)
7794 thr->th.th_set_nproc = num_threads;
7797static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7799 KMP_DEBUG_ASSERT(thr);
7801 if (!TCR_4(__kmp_init_middle))
7802 __kmp_middle_initialize();
7803 __kmp_assign_root_init_mask();
7804 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7805 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7807 if (num_threads == 0) {
7808 if (__kmp_teams_thread_limit > 0) {
7809 num_threads = __kmp_teams_thread_limit;
7811 num_threads = __kmp_avail_proc / num_teams;
7816 if (num_threads > __kmp_dflt_team_nth) {
7817 num_threads = __kmp_dflt_team_nth;
7819 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7820 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7822 if (num_teams * num_threads > __kmp_teams_max_nth) {
7823 num_threads = __kmp_teams_max_nth / num_teams;
7825 if (num_threads == 0) {
7829 if (num_threads < 0) {
7830 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7836 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7838 if (num_threads > __kmp_dflt_team_nth) {
7839 num_threads = __kmp_dflt_team_nth;
7841 if (num_teams * num_threads > __kmp_teams_max_nth) {
7842 int new_threads = __kmp_teams_max_nth / num_teams;
7843 if (new_threads == 0) {
7846 if (new_threads != num_threads) {
7847 if (!__kmp_reserve_warn) {
7848 __kmp_reserve_warn = 1;
7849 __kmp_msg(kmp_ms_warning,
7850 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7851 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7854 num_threads = new_threads;
7857 thr->th.th_teams_size.nth = num_threads;
7862void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7864 kmp_info_t *thr = __kmp_threads[gtid];
7865 if (num_teams < 0) {
7868 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7872 if (num_teams == 0) {
7873 if (__kmp_nteams > 0) {
7874 num_teams = __kmp_nteams;
7879 if (num_teams > __kmp_teams_max_nth) {
7880 if (!__kmp_reserve_warn) {
7881 __kmp_reserve_warn = 1;
7882 __kmp_msg(kmp_ms_warning,
7883 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7884 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7886 num_teams = __kmp_teams_max_nth;
7890 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7892 __kmp_push_thread_limit(thr, num_teams, num_threads);
7897void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7898 int num_teams_ub,
int num_threads) {
7899 kmp_info_t *thr = __kmp_threads[gtid];
7900 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7901 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7902 KMP_DEBUG_ASSERT(num_threads >= 0);
7904 if (num_teams_lb > num_teams_ub) {
7905 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7906 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7911 if (num_teams_lb == 0 && num_teams_ub > 0)
7912 num_teams_lb = num_teams_ub;
7914 if (num_teams_lb == 0 && num_teams_ub == 0) {
7915 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7916 if (num_teams > __kmp_teams_max_nth) {
7917 if (!__kmp_reserve_warn) {
7918 __kmp_reserve_warn = 1;
7919 __kmp_msg(kmp_ms_warning,
7920 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7921 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7923 num_teams = __kmp_teams_max_nth;
7925 }
else if (num_teams_lb == num_teams_ub) {
7926 num_teams = num_teams_ub;
7928 if (num_threads <= 0) {
7929 if (num_teams_ub > __kmp_teams_max_nth) {
7930 num_teams = num_teams_lb;
7932 num_teams = num_teams_ub;
7935 num_teams = (num_threads > __kmp_teams_max_nth)
7937 : __kmp_teams_max_nth / num_threads;
7938 if (num_teams < num_teams_lb) {
7939 num_teams = num_teams_lb;
7940 }
else if (num_teams > num_teams_ub) {
7941 num_teams = num_teams_ub;
7947 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7949 __kmp_push_thread_limit(thr, num_teams, num_threads);
7953void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7954 kmp_info_t *thr = __kmp_threads[gtid];
7955 thr->th.th_set_proc_bind = proc_bind;
7960void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7961 kmp_info_t *this_thr = __kmp_threads[gtid];
7967 KMP_DEBUG_ASSERT(team);
7968 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7969 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7972 team->t.t_construct = 0;
7973 team->t.t_ordered.dt.t_value =
7977 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7978 if (team->t.t_max_nproc > 1) {
7980 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7981 team->t.t_disp_buffer[i].buffer_index = i;
7982 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7985 team->t.t_disp_buffer[0].buffer_index = 0;
7986 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7990 KMP_ASSERT(this_thr->th.th_team == team);
7993 for (f = 0; f < team->t.t_nproc; f++) {
7994 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7995 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8000 __kmp_fork_barrier(gtid, 0);
8003void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8004 kmp_info_t *this_thr = __kmp_threads[gtid];
8006 KMP_DEBUG_ASSERT(team);
8007 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8008 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8014 if (__kmp_threads[gtid] &&
8015 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8016 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8017 __kmp_threads[gtid]);
8018 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8019 "team->t.t_nproc=%d\n",
8020 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8022 __kmp_print_structure();
8024 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8025 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8028 __kmp_join_barrier(gtid);
8030 if (ompt_enabled.enabled &&
8031 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8032 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8033 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8034 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8036 void *codeptr = NULL;
8037 if (KMP_MASTER_TID(ds_tid) &&
8038 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8039 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8040 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8042 if (ompt_enabled.ompt_callback_sync_region_wait) {
8043 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8044 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8047 if (ompt_enabled.ompt_callback_sync_region) {
8048 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8049 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8053 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8054 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8055 ompt_scope_end, NULL, task_data, 0, ds_tid,
8056 ompt_task_implicit);
8062 KMP_ASSERT(this_thr->th.th_team == team);
8067#ifdef USE_LOAD_BALANCE
8071static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8074 kmp_team_t *hot_team;
8076 if (root->r.r_active) {
8079 hot_team = root->r.r_hot_team;
8080 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8081 return hot_team->t.t_nproc - 1;
8086 for (i = 1; i < hot_team->t.t_nproc; i++) {
8087 if (hot_team->t.t_threads[i]->th.th_active) {
8096static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8099 int hot_team_active;
8100 int team_curr_active;
8103 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8105 KMP_DEBUG_ASSERT(root);
8106 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8107 ->th.th_current_task->td_icvs.dynamic == TRUE);
8108 KMP_DEBUG_ASSERT(set_nproc > 1);
8110 if (set_nproc == 1) {
8111 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8120 pool_active = __kmp_thread_pool_active_nth;
8121 hot_team_active = __kmp_active_hot_team_nproc(root);
8122 team_curr_active = pool_active + hot_team_active + 1;
8125 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8126 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8127 "hot team active = %d\n",
8128 system_active, pool_active, hot_team_active));
8130 if (system_active < 0) {
8134 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8135 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8138 retval = __kmp_avail_proc - __kmp_nth +
8139 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8140 if (retval > set_nproc) {
8143 if (retval < KMP_MIN_NTH) {
8144 retval = KMP_MIN_NTH;
8147 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8155 if (system_active < team_curr_active) {
8156 system_active = team_curr_active;
8158 retval = __kmp_avail_proc - system_active + team_curr_active;
8159 if (retval > set_nproc) {
8162 if (retval < KMP_MIN_NTH) {
8163 retval = KMP_MIN_NTH;
8166 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8175void __kmp_cleanup(
void) {
8178 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8180 if (TCR_4(__kmp_init_parallel)) {
8181#if KMP_HANDLE_SIGNALS
8182 __kmp_remove_signals();
8184 TCW_4(__kmp_init_parallel, FALSE);
8187 if (TCR_4(__kmp_init_middle)) {
8188#if KMP_AFFINITY_SUPPORTED
8189 __kmp_affinity_uninitialize();
8191 __kmp_cleanup_hierarchy();
8192 TCW_4(__kmp_init_middle, FALSE);
8195 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8197 if (__kmp_init_serial) {
8198 __kmp_runtime_destroy();
8199 __kmp_init_serial = FALSE;
8202 __kmp_cleanup_threadprivate_caches();
8204 for (f = 0; f < __kmp_threads_capacity; f++) {
8205 if (__kmp_root[f] != NULL) {
8206 __kmp_free(__kmp_root[f]);
8207 __kmp_root[f] = NULL;
8210 __kmp_free(__kmp_threads);
8213 __kmp_threads = NULL;
8215 __kmp_threads_capacity = 0;
8218 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8220 kmp_old_threads_list_t *next = ptr->next;
8221 __kmp_free(ptr->threads);
8226#if KMP_USE_DYNAMIC_LOCK
8227 __kmp_cleanup_indirect_user_locks();
8229 __kmp_cleanup_user_locks();
8233 __kmp_free(ompd_env_block);
8234 ompd_env_block = NULL;
8235 ompd_env_block_size = 0;
8239#if KMP_AFFINITY_SUPPORTED
8240 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8241 __kmp_cpuinfo_file = NULL;
8244#if KMP_USE_ADAPTIVE_LOCKS
8245#if KMP_DEBUG_ADAPTIVE_LOCKS
8246 __kmp_print_speculative_stats();
8249 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8250 __kmp_nested_nth.nth = NULL;
8251 __kmp_nested_nth.size = 0;
8252 __kmp_nested_nth.used = 0;
8253 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8254 __kmp_nested_proc_bind.bind_types = NULL;
8255 __kmp_nested_proc_bind.size = 0;
8256 __kmp_nested_proc_bind.used = 0;
8257 if (__kmp_affinity_format) {
8258 KMP_INTERNAL_FREE(__kmp_affinity_format);
8259 __kmp_affinity_format = NULL;
8262 __kmp_i18n_catclose();
8264#if KMP_USE_HIER_SCHED
8265 __kmp_hier_scheds.deallocate();
8268#if KMP_STATS_ENABLED
8272 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8277int __kmp_ignore_mppbeg(
void) {
8280 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8281 if (__kmp_str_match_false(env))
8288int __kmp_ignore_mppend(
void) {
8291 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8292 if (__kmp_str_match_false(env))
8299void __kmp_internal_begin(
void) {
8305 gtid = __kmp_entry_gtid();
8306 root = __kmp_threads[gtid]->th.th_root;
8307 KMP_ASSERT(KMP_UBER_GTID(gtid));
8309 if (root->r.r_begin)
8311 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8312 if (root->r.r_begin) {
8313 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8317 root->r.r_begin = TRUE;
8319 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8324void __kmp_user_set_library(
enum library_type arg) {
8331 gtid = __kmp_entry_gtid();
8332 thread = __kmp_threads[gtid];
8334 root = thread->th.th_root;
8336 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8338 if (root->r.r_in_parallel) {
8340 KMP_WARNING(SetLibraryIncorrectCall);
8345 case library_serial:
8346 thread->th.th_set_nproc = 0;
8347 set__nproc(thread, 1);
8349 case library_turnaround:
8350 thread->th.th_set_nproc = 0;
8351 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8352 : __kmp_dflt_team_nth_ub);
8354 case library_throughput:
8355 thread->th.th_set_nproc = 0;
8356 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8357 : __kmp_dflt_team_nth_ub);
8360 KMP_FATAL(UnknownLibraryType, arg);
8363 __kmp_aux_set_library(arg);
8366void __kmp_aux_set_stacksize(
size_t arg) {
8367 if (!__kmp_init_serial)
8368 __kmp_serial_initialize();
8371 if (arg & (0x1000 - 1)) {
8372 arg &= ~(0x1000 - 1);
8377 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8380 if (!TCR_4(__kmp_init_parallel)) {
8383 if (value < __kmp_sys_min_stksize)
8384 value = __kmp_sys_min_stksize;
8385 else if (value > KMP_MAX_STKSIZE)
8386 value = KMP_MAX_STKSIZE;
8388 __kmp_stksize = value;
8390 __kmp_env_stksize = TRUE;
8393 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8398void __kmp_aux_set_library(
enum library_type arg) {
8399 __kmp_library = arg;
8401 switch (__kmp_library) {
8402 case library_serial: {
8403 KMP_INFORM(LibraryIsSerial);
8405 case library_turnaround:
8406 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8407 __kmp_use_yield = 2;
8409 case library_throughput:
8410 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8411 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8414 KMP_FATAL(UnknownLibraryType, arg);
8420static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8421 kmp_info_t *thr = __kmp_entry_thread();
8422 teams_serialized = 0;
8423 if (thr->th.th_teams_microtask) {
8424 kmp_team_t *team = thr->th.th_team;
8425 int tlevel = thr->th.th_teams_level;
8426 int ii = team->t.t_level;
8427 teams_serialized = team->t.t_serialized;
8428 int level = tlevel + 1;
8429 KMP_DEBUG_ASSERT(ii >= tlevel);
8430 while (ii > level) {
8431 for (teams_serialized = team->t.t_serialized;
8432 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8434 if (team->t.t_serialized && (!teams_serialized)) {
8435 team = team->t.t_parent;
8439 team = team->t.t_parent;
8448int __kmp_aux_get_team_num() {
8450 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8452 if (serialized > 1) {
8455 return team->t.t_master_tid;
8461int __kmp_aux_get_num_teams() {
8463 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8465 if (serialized > 1) {
8468 return team->t.t_parent->t.t_nproc;
8507typedef struct kmp_affinity_format_field_t {
8509 const char *long_name;
8512} kmp_affinity_format_field_t;
8514static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8515#if KMP_AFFINITY_SUPPORTED
8516 {
'A',
"thread_affinity",
's'},
8518 {
't',
"team_num",
'd'},
8519 {
'T',
"num_teams",
'd'},
8520 {
'L',
"nesting_level",
'd'},
8521 {
'n',
"thread_num",
'd'},
8522 {
'N',
"num_threads",
'd'},
8523 {
'a',
"ancestor_tnum",
'd'},
8525 {
'P',
"process_id",
'd'},
8526 {
'i',
"native_thread_id",
'd'}};
8529static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8531 kmp_str_buf_t *field_buffer) {
8532 int rc, format_index, field_value;
8533 const char *width_left, *width_right;
8534 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8535 static const int FORMAT_SIZE = 20;
8536 char format[FORMAT_SIZE] = {0};
8537 char absolute_short_name = 0;
8539 KMP_DEBUG_ASSERT(gtid >= 0);
8540 KMP_DEBUG_ASSERT(th);
8541 KMP_DEBUG_ASSERT(**ptr ==
'%');
8542 KMP_DEBUG_ASSERT(field_buffer);
8544 __kmp_str_buf_clear(field_buffer);
8551 __kmp_str_buf_cat(field_buffer,
"%", 1);
8562 right_justify =
false;
8564 right_justify =
true;
8568 width_left = width_right = NULL;
8569 if (**ptr >=
'0' && **ptr <=
'9') {
8577 format[format_index++] =
'%';
8579 format[format_index++] =
'-';
8581 format[format_index++] =
'0';
8582 if (width_left && width_right) {
8586 while (i < 8 && width_left < width_right) {
8587 format[format_index++] = *width_left;
8595 found_valid_name =
false;
8596 parse_long_name = (**ptr ==
'{');
8597 if (parse_long_name)
8599 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8600 sizeof(__kmp_affinity_format_table[0]);
8602 char short_name = __kmp_affinity_format_table[i].short_name;
8603 const char *long_name = __kmp_affinity_format_table[i].long_name;
8604 char field_format = __kmp_affinity_format_table[i].field_format;
8605 if (parse_long_name) {
8606 size_t length = KMP_STRLEN(long_name);
8607 if (strncmp(*ptr, long_name, length) == 0) {
8608 found_valid_name =
true;
8611 }
else if (**ptr == short_name) {
8612 found_valid_name =
true;
8615 if (found_valid_name) {
8616 format[format_index++] = field_format;
8617 format[format_index++] =
'\0';
8618 absolute_short_name = short_name;
8622 if (parse_long_name) {
8624 absolute_short_name = 0;
8632 switch (absolute_short_name) {
8634 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8637 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8640 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8643 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8646 static const int BUFFER_SIZE = 256;
8647 char buf[BUFFER_SIZE];
8648 __kmp_expand_host_name(buf, BUFFER_SIZE);
8649 rc = __kmp_str_buf_print(field_buffer, format, buf);
8652 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8655 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8658 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8662 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8663 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8665#if KMP_AFFINITY_SUPPORTED
8668 __kmp_str_buf_init(&buf);
8669 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8670 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8671 __kmp_str_buf_free(&buf);
8677 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8679 if (parse_long_name) {
8688 KMP_ASSERT(format_index <= FORMAT_SIZE);
8698size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8699 kmp_str_buf_t *buffer) {
8700 const char *parse_ptr;
8702 const kmp_info_t *th;
8703 kmp_str_buf_t field;
8705 KMP_DEBUG_ASSERT(buffer);
8706 KMP_DEBUG_ASSERT(gtid >= 0);
8708 __kmp_str_buf_init(&field);
8709 __kmp_str_buf_clear(buffer);
8711 th = __kmp_threads[gtid];
8717 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8718 parse_ptr = __kmp_affinity_format;
8720 KMP_DEBUG_ASSERT(parse_ptr);
8722 while (*parse_ptr !=
'\0') {
8724 if (*parse_ptr ==
'%') {
8726 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8727 __kmp_str_buf_catbuf(buffer, &field);
8731 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8736 __kmp_str_buf_free(&field);
8741void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8743 __kmp_str_buf_init(&buf);
8744 __kmp_aux_capture_affinity(gtid, format, &buf);
8745 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8746 __kmp_str_buf_free(&buf);
8751void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8752 int blocktime = arg;
8758 __kmp_save_internal_controls(thread);
8761 if (blocktime < KMP_MIN_BLOCKTIME)
8762 blocktime = KMP_MIN_BLOCKTIME;
8763 else if (blocktime > KMP_MAX_BLOCKTIME)
8764 blocktime = KMP_MAX_BLOCKTIME;
8766 set__blocktime_team(thread->th.th_team, tid, blocktime);
8767 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8771 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8773 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8774 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8780 set__bt_set_team(thread->th.th_team, tid, bt_set);
8781 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8783 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8784 "bt_intervals=%d, monitor_updates=%d\n",
8785 __kmp_gtid_from_tid(tid, thread->th.th_team),
8786 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8787 __kmp_monitor_wakeups));
8789 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8790 __kmp_gtid_from_tid(tid, thread->th.th_team),
8791 thread->th.th_team->t.t_id, tid, blocktime));
8795void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8796 if (!__kmp_init_serial) {
8797 __kmp_serial_initialize();
8799 __kmp_env_initialize(str);
8801 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8809PACKED_REDUCTION_METHOD_T
8810__kmp_determine_reduction_method(
8811 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8812 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8813 kmp_critical_name *lck) {
8824 PACKED_REDUCTION_METHOD_T retval;
8828 KMP_DEBUG_ASSERT(loc);
8829 KMP_DEBUG_ASSERT(lck);
8831#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8833 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8834#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8836 retval = critical_reduce_block;
8839 team_size = __kmp_get_team_num_threads(global_tid);
8840 if (team_size == 1) {
8842 retval = empty_reduce_block;
8846 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8848#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8849 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
8851#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8852 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8854 int teamsize_cutoff = 4;
8856#if KMP_MIC_SUPPORTED
8857 if (__kmp_mic_type != non_mic) {
8858 teamsize_cutoff = 8;
8861 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8862 if (tree_available) {
8863 if (team_size <= teamsize_cutoff) {
8864 if (atomic_available) {
8865 retval = atomic_reduce_block;
8868 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8870 }
else if (atomic_available) {
8871 retval = atomic_reduce_block;
8874#error "Unknown or unsupported OS"
8878#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8880#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8884 if (atomic_available) {
8885 if (num_vars <= 2) {
8886 retval = atomic_reduce_block;
8892 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8893 if (atomic_available && (num_vars <= 3)) {
8894 retval = atomic_reduce_block;
8895 }
else if (tree_available) {
8896 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8897 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8898 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8903#error "Unknown or unsupported OS"
8907#error "Unknown or unsupported architecture"
8915 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8918 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8920 int atomic_available, tree_available;
8922 switch ((forced_retval = __kmp_force_reduction_method)) {
8923 case critical_reduce_block:
8927 case atomic_reduce_block:
8928 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8929 if (!atomic_available) {
8930 KMP_WARNING(RedMethodNotSupported,
"atomic");
8931 forced_retval = critical_reduce_block;
8935 case tree_reduce_block:
8936 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8937 if (!tree_available) {
8938 KMP_WARNING(RedMethodNotSupported,
"tree");
8939 forced_retval = critical_reduce_block;
8941#if KMP_FAST_REDUCTION_BARRIER
8942 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8951 retval = forced_retval;
8954 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8956#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8957#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8962kmp_int32 __kmp_get_reduce_method(
void) {
8963 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8968void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8972void __kmp_hard_pause() {
8973 __kmp_pause_status = kmp_hard_paused;
8974 __kmp_internal_end_thread(-1);
8978void __kmp_resume_if_soft_paused() {
8979 if (__kmp_pause_status == kmp_soft_paused) {
8980 __kmp_pause_status = kmp_not_paused;
8982 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8983 kmp_info_t *thread = __kmp_threads[gtid];
8985 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8987 if (fl.is_sleeping())
8989 else if (__kmp_try_suspend_mx(thread)) {
8990 __kmp_unlock_suspend_mx(thread);
8993 if (fl.is_sleeping()) {
8996 }
else if (__kmp_try_suspend_mx(thread)) {
8997 __kmp_unlock_suspend_mx(thread);
9009int __kmp_pause_resource(kmp_pause_status_t level) {
9010 if (level == kmp_not_paused) {
9011 if (__kmp_pause_status == kmp_not_paused) {
9015 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9016 __kmp_pause_status == kmp_hard_paused);
9017 __kmp_pause_status = kmp_not_paused;
9020 }
else if (level == kmp_soft_paused) {
9021 if (__kmp_pause_status != kmp_not_paused) {
9028 }
else if (level == kmp_hard_paused) {
9029 if (__kmp_pause_status != kmp_not_paused) {
9042void __kmp_omp_display_env(
int verbose) {
9043 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9044 if (__kmp_init_serial == 0)
9045 __kmp_do_serial_initialize();
9046 __kmp_display_env_impl(!verbose, verbose);
9047 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9051void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9053 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9055 kmp_info_t **other_threads = team->t.t_threads;
9059 for (
int f = 1; f < old_nthreads; ++f) {
9060 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9062 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9068 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9069 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9073 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9075 team->t.t_threads[f]->th.th_used_in_team.store(2);
9076 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9079 team->t.b->go_release();
9085 int count = old_nthreads - 1;
9087 count = old_nthreads - 1;
9088 for (
int f = 1; f < old_nthreads; ++f) {
9089 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9090 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9091 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9092 void *, other_threads[f]->th.th_sleep_loc);
9093 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9096 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9102 team->t.b->update_num_threads(new_nthreads);
9103 team->t.b->go_reset();
9106void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9108 KMP_DEBUG_ASSERT(team);
9114 for (
int f = 1; f < new_nthreads; ++f) {
9115 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9116 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9118 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9119 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9120 (kmp_flag_32<false, false> *)NULL);
9126 int count = new_nthreads - 1;
9128 count = new_nthreads - 1;
9129 for (
int f = 1; f < new_nthreads; ++f) {
9130 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9138kmp_info_t **__kmp_hidden_helper_threads;
9139kmp_info_t *__kmp_hidden_helper_main_thread;
9140std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9142kmp_int32 __kmp_hidden_helper_threads_num = 8;
9143kmp_int32 __kmp_enable_hidden_helper = TRUE;
9145kmp_int32 __kmp_hidden_helper_threads_num = 0;
9146kmp_int32 __kmp_enable_hidden_helper = FALSE;
9150std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9152void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9157 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9158 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9159 __kmp_hidden_helper_threads_num)
9165 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9166 __kmp_hidden_helper_initz_release();
9167 __kmp_hidden_helper_main_thread_wait();
9169 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9170 __kmp_hidden_helper_worker_thread_signal();
9176void __kmp_hidden_helper_threads_initz_routine() {
9178 const int gtid = __kmp_register_root(TRUE);
9179 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9180 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9181 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9182 __kmp_hidden_helper_threads_num;
9184 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9189 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9191 __kmp_hidden_helper_threads_deinitz_release();
9211void __kmp_init_nesting_mode() {
9212 int levels = KMP_HW_LAST;
9213 __kmp_nesting_mode_nlevels = levels;
9214 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9215 for (
int i = 0; i < levels; ++i)
9216 __kmp_nesting_nth_level[i] = 0;
9217 if (__kmp_nested_nth.size < levels) {
9218 __kmp_nested_nth.nth =
9219 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9220 __kmp_nested_nth.size = levels;
9225void __kmp_set_nesting_mode_threads() {
9226 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9228 if (__kmp_nesting_mode == 1)
9229 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9230 else if (__kmp_nesting_mode > 1)
9231 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9233 if (__kmp_topology) {
9235 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9236 loc < __kmp_nesting_mode_nlevels;
9237 loc++, hw_level++) {
9238 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9239 if (__kmp_nesting_nth_level[loc] == 1)
9243 if (__kmp_nesting_mode > 1 && loc > 1) {
9244 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9245 int num_cores = __kmp_topology->get_count(core_level);
9246 int upper_levels = 1;
9247 for (
int level = 0; level < loc - 1; ++level)
9248 upper_levels *= __kmp_nesting_nth_level[level];
9249 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9250 __kmp_nesting_nth_level[loc - 1] =
9251 num_cores / __kmp_nesting_nth_level[loc - 2];
9253 __kmp_nesting_mode_nlevels = loc;
9254 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9256 if (__kmp_avail_proc >= 4) {
9257 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9258 __kmp_nesting_nth_level[1] = 2;
9259 __kmp_nesting_mode_nlevels = 2;
9261 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9262 __kmp_nesting_mode_nlevels = 1;
9264 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9266 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9267 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9269 set__nproc(thread, __kmp_nesting_nth_level[0]);
9270 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9271 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9272 if (get__max_active_levels(thread) > 1) {
9274 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9276 if (__kmp_nesting_mode == 1)
9277 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9282#if !KMP_STATS_ENABLED
9283void __kmp_reset_stats() {}
9286int __kmp_omp_debug_struct_info = FALSE;
9287int __kmp_debugging = FALSE;
9289#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9290void __kmp_itt_fini_ittlib() {}
9291void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)