16 #include "kmp_error.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
23 #define MAX_MESSAGE 512
39 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
44 }
else if (__kmp_ignore_mppbeg() == FALSE) {
46 __kmp_internal_begin();
47 KC_TRACE(10, (
"__kmpc_begin: called\n"));
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, (
"__kmpc_end: called\n"));
67 KA_TRACE(30, (
"__kmpc_end\n"));
69 __kmp_internal_end_thread(-1);
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
100 kmp_int32 gtid = __kmp_entry_gtid();
102 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
123 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125 return TCR_4(__kmp_all_nth);
135 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
145 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
167 if (__kmp_par_range == 0) {
174 semi2 = strchr(semi2,
';');
178 semi2 = strchr(semi2 + 1,
';');
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
187 if ((*name ==
'/') || (*name ==
';')) {
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
194 semi3 = strchr(semi2 + 1,
';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
201 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
205 return __kmp_par_range < 0;
219 return __kmp_entry_thread()->th.th_root->r.r_active;
232 kmp_int32 num_threads) {
233 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
239 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
244 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
263 int gtid = __kmp_entry_gtid();
265 #if (KMP_STATS_ENABLED)
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
285 va_start(ap, microtask);
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 kmp_team_t *parent_team = master_th->th.th_team;
292 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
294 ompt_frame = &(lwt->ompt_task_info.frame);
296 int tid = __kmp_tid_from_gtid(gtid);
298 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
300 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
302 OMPT_STORE_RETURN_ADDRESS(gtid);
305 #if INCLUDE_SSC_MARKS
308 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
309 VOLATILE_CAST(microtask_t) microtask,
310 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
312 #if INCLUDE_SSC_MARKS
315 __kmp_join_call(loc, gtid
325 #if KMP_STATS_ENABLED
326 if (previous_state == stats_state_e::SERIAL_REGION) {
327 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
328 KMP_SET_THREAD_STATE(previous_state);
330 KMP_POP_PARTITIONED_TIMER();
347 kmp_int32 num_teams, kmp_int32 num_threads) {
349 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
350 global_tid, num_teams, num_threads));
351 __kmp_assert_valid_gtid(global_tid);
352 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
372 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
373 kmp_int32 num_threads) {
374 KA_TRACE(20, (
"__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
375 " num_teams_ub=%d num_threads=%d\n",
376 global_tid, num_teams_lb, num_teams_ub, num_threads));
377 __kmp_assert_valid_gtid(global_tid);
378 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
394 int gtid = __kmp_entry_gtid();
395 kmp_info_t *this_thr = __kmp_threads[gtid];
397 va_start(ap, microtask);
399 #if KMP_STATS_ENABLED
402 if (previous_state == stats_state_e::SERIAL_REGION) {
403 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
405 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
410 this_thr->th.th_teams_microtask = microtask;
411 this_thr->th.th_teams_level =
412 this_thr->th.th_team->t.t_level;
415 kmp_team_t *parent_team = this_thr->th.th_team;
416 int tid = __kmp_tid_from_gtid(gtid);
417 if (ompt_enabled.enabled) {
418 parent_team->t.t_implicit_task_taskdata[tid]
419 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
421 OMPT_STORE_RETURN_ADDRESS(gtid);
426 if (this_thr->th.th_teams_size.nteams == 0) {
427 __kmp_push_num_teams(loc, gtid, 0, 0);
429 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
431 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
434 loc, gtid, fork_context_intel, argc,
435 VOLATILE_CAST(microtask_t) __kmp_teams_master,
436 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
437 __kmp_join_call(loc, gtid
445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
446 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
447 this_thr->th.th_cg_roots = tmp->up;
448 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
449 " to node %p. cg_nthreads was %d\n",
450 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
451 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
452 int i = tmp->cg_nthreads--;
457 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
458 this_thr->th.th_current_task->td_icvs.thread_limit =
459 this_thr->th.th_cg_roots->cg_thread_limit;
461 this_thr->th.th_teams_microtask = NULL;
462 this_thr->th.th_teams_level = 0;
463 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
465 #if KMP_STATS_ENABLED
466 if (previous_state == stats_state_e::SERIAL_REGION) {
467 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
468 KMP_SET_THREAD_STATE(previous_state);
470 KMP_POP_PARTITIONED_TIMER();
479 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
497 __kmp_assert_valid_gtid(global_tid);
499 OMPT_STORE_RETURN_ADDRESS(global_tid);
501 __kmp_serialized_parallel(loc, global_tid);
512 kmp_internal_control_t *top;
513 kmp_info_t *this_thr;
514 kmp_team_t *serial_team;
517 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
525 __kmp_assert_valid_gtid(global_tid);
526 if (!TCR_4(__kmp_init_parallel))
527 __kmp_parallel_initialize();
529 __kmp_resume_if_soft_paused();
531 this_thr = __kmp_threads[global_tid];
532 serial_team = this_thr->th.th_serial_team;
534 kmp_task_team_t *task_team = this_thr->th.th_task_team;
536 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
537 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
540 KMP_DEBUG_ASSERT(serial_team);
541 KMP_ASSERT(serial_team->t.t_serialized);
542 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
543 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
544 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
545 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
548 if (ompt_enabled.enabled &&
549 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
550 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
551 if (ompt_enabled.ompt_callback_implicit_task) {
552 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
553 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
554 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
558 ompt_data_t *parent_task_data;
559 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
561 if (ompt_enabled.ompt_callback_parallel_end) {
562 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
563 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
564 ompt_parallel_invoker_program | ompt_parallel_team,
565 OMPT_LOAD_RETURN_ADDRESS(global_tid));
567 __ompt_lw_taskteam_unlink(this_thr);
568 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
574 top = serial_team->t.t_control_stack_top;
575 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
576 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
577 serial_team->t.t_control_stack_top = top->next;
582 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
584 dispatch_private_info_t *disp_buffer =
585 serial_team->t.t_dispatch->th_disp_buffer;
586 serial_team->t.t_dispatch->th_disp_buffer =
587 serial_team->t.t_dispatch->th_disp_buffer->next;
588 __kmp_free(disp_buffer);
590 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
592 --serial_team->t.t_serialized;
593 if (serial_team->t.t_serialized == 0) {
597 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
598 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
599 __kmp_clear_x87_fpu_status_word();
600 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
601 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
605 __kmp_pop_current_task_from_thread(this_thr);
607 if (ompd_state & OMPD_ENABLE_BP)
608 ompd_bp_parallel_end();
611 this_thr->th.th_team = serial_team->t.t_parent;
612 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
615 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
616 this_thr->th.th_team_master =
617 serial_team->t.t_parent->t.t_threads[0];
618 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
621 this_thr->th.th_dispatch =
622 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
624 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
625 this_thr->th.th_current_task->td_flags.executing = 1;
627 if (__kmp_tasking_mode != tskm_immediate_exec) {
629 this_thr->th.th_task_team =
630 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
632 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
634 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
637 if (__kmp_tasking_mode != tskm_immediate_exec) {
638 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
639 "depth of serial team %p to %d\n",
640 global_tid, serial_team, serial_team->t.t_serialized));
644 serial_team->t.t_level--;
645 if (__kmp_env_consistency_check)
646 __kmp_pop_parallel(global_tid, NULL);
648 if (ompt_enabled.enabled)
649 this_thr->th.ompt_thread_info.state =
650 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
651 : ompt_state_work_parallel);
664 KC_TRACE(10, (
"__kmpc_flush: called\n"));
669 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
683 if (!__kmp_cpuinfo.initialized) {
684 __kmp_query_cpuid(&__kmp_cpuinfo);
686 if (!__kmp_cpuinfo.sse2) {
691 #elif KMP_COMPILER_MSVC
694 __sync_synchronize();
698 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
704 #error Unknown or unsupported architecture
707 #if OMPT_SUPPORT && OMPT_OPTIONAL
708 if (ompt_enabled.ompt_callback_flush) {
709 ompt_callbacks.ompt_callback(ompt_callback_flush)(
710 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
725 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
726 __kmp_assert_valid_gtid(global_tid);
728 if (!TCR_4(__kmp_init_parallel))
729 __kmp_parallel_initialize();
731 __kmp_resume_if_soft_paused();
733 if (__kmp_env_consistency_check) {
735 KMP_WARNING(ConstructIdentInvalid);
737 __kmp_check_barrier(global_tid, ct_barrier, loc);
741 ompt_frame_t *ompt_frame;
742 if (ompt_enabled.enabled) {
743 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
744 if (ompt_frame->enter_frame.ptr == NULL)
745 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
747 OMPT_STORE_RETURN_ADDRESS(global_tid);
749 __kmp_threads[global_tid]->th.th_ident = loc;
757 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
758 #if OMPT_SUPPORT && OMPT_OPTIONAL
759 if (ompt_enabled.enabled) {
760 ompt_frame->enter_frame = ompt_data_none;
775 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
776 __kmp_assert_valid_gtid(global_tid);
778 if (!TCR_4(__kmp_init_parallel))
779 __kmp_parallel_initialize();
781 __kmp_resume_if_soft_paused();
783 if (KMP_MASTER_GTID(global_tid)) {
785 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
789 #if OMPT_SUPPORT && OMPT_OPTIONAL
791 if (ompt_enabled.ompt_callback_masked) {
792 kmp_info_t *this_thr = __kmp_threads[global_tid];
793 kmp_team_t *team = this_thr->th.th_team;
795 int tid = __kmp_tid_from_gtid(global_tid);
796 ompt_callbacks.ompt_callback(ompt_callback_masked)(
797 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
798 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
799 OMPT_GET_RETURN_ADDRESS(0));
804 if (__kmp_env_consistency_check) {
805 #if KMP_USE_DYNAMIC_LOCK
807 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
809 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
812 __kmp_push_sync(global_tid, ct_master, loc, NULL);
814 __kmp_check_sync(global_tid, ct_master, loc, NULL);
830 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
831 __kmp_assert_valid_gtid(global_tid);
832 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
833 KMP_POP_PARTITIONED_TIMER();
835 #if OMPT_SUPPORT && OMPT_OPTIONAL
836 kmp_info_t *this_thr = __kmp_threads[global_tid];
837 kmp_team_t *team = this_thr->th.th_team;
838 if (ompt_enabled.ompt_callback_masked) {
839 int tid = __kmp_tid_from_gtid(global_tid);
840 ompt_callbacks.ompt_callback(ompt_callback_masked)(
841 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
842 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
843 OMPT_GET_RETURN_ADDRESS(0));
847 if (__kmp_env_consistency_check) {
848 if (KMP_MASTER_GTID(global_tid))
849 __kmp_pop_sync(global_tid, ct_master, loc);
864 KC_TRACE(10, (
"__kmpc_masked: called T#%d\n", global_tid));
865 __kmp_assert_valid_gtid(global_tid);
867 if (!TCR_4(__kmp_init_parallel))
868 __kmp_parallel_initialize();
870 __kmp_resume_if_soft_paused();
872 tid = __kmp_tid_from_gtid(global_tid);
875 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
879 #if OMPT_SUPPORT && OMPT_OPTIONAL
881 if (ompt_enabled.ompt_callback_masked) {
882 kmp_info_t *this_thr = __kmp_threads[global_tid];
883 kmp_team_t *team = this_thr->th.th_team;
884 ompt_callbacks.ompt_callback(ompt_callback_masked)(
885 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
886 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
887 OMPT_GET_RETURN_ADDRESS(0));
892 if (__kmp_env_consistency_check) {
893 #if KMP_USE_DYNAMIC_LOCK
895 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
897 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
900 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
902 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
918 KC_TRACE(10, (
"__kmpc_end_masked: called T#%d\n", global_tid));
919 __kmp_assert_valid_gtid(global_tid);
920 KMP_POP_PARTITIONED_TIMER();
922 #if OMPT_SUPPORT && OMPT_OPTIONAL
923 kmp_info_t *this_thr = __kmp_threads[global_tid];
924 kmp_team_t *team = this_thr->th.th_team;
925 if (ompt_enabled.ompt_callback_masked) {
926 int tid = __kmp_tid_from_gtid(global_tid);
927 ompt_callbacks.ompt_callback(ompt_callback_masked)(
928 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
929 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
930 OMPT_GET_RETURN_ADDRESS(0));
934 if (__kmp_env_consistency_check) {
935 __kmp_pop_sync(global_tid, ct_masked, loc);
949 KMP_DEBUG_ASSERT(__kmp_init_serial);
951 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
952 __kmp_assert_valid_gtid(gtid);
954 if (!TCR_4(__kmp_init_parallel))
955 __kmp_parallel_initialize();
957 __kmp_resume_if_soft_paused();
960 __kmp_itt_ordered_prep(gtid);
964 th = __kmp_threads[gtid];
966 #if OMPT_SUPPORT && OMPT_OPTIONAL
970 OMPT_STORE_RETURN_ADDRESS(gtid);
971 if (ompt_enabled.enabled) {
972 team = __kmp_team_from_gtid(gtid);
973 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
975 th->th.ompt_thread_info.wait_id = lck;
976 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
979 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
980 if (ompt_enabled.ompt_callback_mutex_acquire) {
981 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
982 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
988 if (th->th.th_dispatch->th_deo_fcn != 0)
989 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
991 __kmp_parallel_deo(>id, &cid, loc);
993 #if OMPT_SUPPORT && OMPT_OPTIONAL
994 if (ompt_enabled.enabled) {
996 th->th.ompt_thread_info.state = ompt_state_work_parallel;
997 th->th.ompt_thread_info.wait_id = 0;
1000 if (ompt_enabled.ompt_callback_mutex_acquired) {
1001 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1002 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1008 __kmp_itt_ordered_start(gtid);
1023 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
1024 __kmp_assert_valid_gtid(gtid);
1027 __kmp_itt_ordered_end(gtid);
1031 th = __kmp_threads[gtid];
1033 if (th->th.th_dispatch->th_dxo_fcn != 0)
1034 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
1036 __kmp_parallel_dxo(>id, &cid, loc);
1038 #if OMPT_SUPPORT && OMPT_OPTIONAL
1039 OMPT_STORE_RETURN_ADDRESS(gtid);
1040 if (ompt_enabled.ompt_callback_mutex_released) {
1041 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1043 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1044 ->t.t_ordered.dt.t_value,
1045 OMPT_LOAD_RETURN_ADDRESS(gtid));
1050 #if KMP_USE_DYNAMIC_LOCK
1052 static __forceinline
void
1053 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
1054 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1058 kmp_indirect_lock_t **lck;
1059 lck = (kmp_indirect_lock_t **)crit;
1060 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1061 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1062 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1063 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1065 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1067 __kmp_itt_critical_creating(ilk->lock, loc);
1069 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
1072 __kmp_itt_critical_destroyed(ilk->lock);
1078 KMP_DEBUG_ASSERT(*lck != NULL);
1082 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1084 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1085 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1086 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1087 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1088 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1090 KMP_FSYNC_PREPARE(l); \
1091 KMP_INIT_YIELD(spins); \
1092 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1094 if (TCR_4(__kmp_nth) > \
1095 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1098 KMP_YIELD_SPIN(spins); \
1100 __kmp_spin_backoff(&backoff); \
1102 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1103 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1105 KMP_FSYNC_ACQUIRED(l); \
1109 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1111 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1112 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1113 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1114 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1115 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1119 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1120 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1124 #include <sys/syscall.h>
1127 #define FUTEX_WAIT 0
1130 #define FUTEX_WAKE 1
1134 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1136 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1137 kmp_int32 gtid_code = (gtid + 1) << 1; \
1139 KMP_FSYNC_PREPARE(ftx); \
1140 kmp_int32 poll_val; \
1141 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1142 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1143 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1144 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1146 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1148 KMP_LOCK_BUSY(1, futex))) { \
1151 poll_val |= KMP_LOCK_BUSY(1, futex); \
1154 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1155 NULL, NULL, 0)) != 0) { \
1160 KMP_FSYNC_ACQUIRED(ftx); \
1164 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1166 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1167 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1168 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1169 KMP_FSYNC_ACQUIRED(ftx); \
1177 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1179 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1181 KMP_FSYNC_RELEASING(ftx); \
1182 kmp_int32 poll_val = \
1183 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1184 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1185 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1186 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1189 KMP_YIELD_OVERSUB(); \
1196 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1199 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1202 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1209 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1210 __kmp_init_user_lock_with_checks(lck);
1211 __kmp_set_user_lock_location(lck, loc);
1213 __kmp_itt_critical_creating(lck);
1224 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1229 __kmp_itt_critical_destroyed(lck);
1233 __kmp_destroy_user_lock_with_checks(lck);
1234 __kmp_user_lock_free(&idx, gtid, lck);
1235 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1236 KMP_DEBUG_ASSERT(lck != NULL);
1255 kmp_critical_name *crit) {
1256 #if KMP_USE_DYNAMIC_LOCK
1257 #if OMPT_SUPPORT && OMPT_OPTIONAL
1258 OMPT_STORE_RETURN_ADDRESS(global_tid);
1263 #if OMPT_SUPPORT && OMPT_OPTIONAL
1264 ompt_state_t prev_state = ompt_state_undefined;
1265 ompt_thread_info_t ti;
1267 kmp_user_lock_p lck;
1269 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1270 __kmp_assert_valid_gtid(global_tid);
1274 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1275 KMP_CHECK_USER_LOCK_INIT();
1277 if ((__kmp_user_lock_kind == lk_tas) &&
1278 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1279 lck = (kmp_user_lock_p)crit;
1282 else if ((__kmp_user_lock_kind == lk_futex) &&
1283 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1284 lck = (kmp_user_lock_p)crit;
1288 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1291 if (__kmp_env_consistency_check)
1292 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1300 __kmp_itt_critical_acquiring(lck);
1302 #if OMPT_SUPPORT && OMPT_OPTIONAL
1303 OMPT_STORE_RETURN_ADDRESS(gtid);
1304 void *codeptr_ra = NULL;
1305 if (ompt_enabled.enabled) {
1306 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1308 prev_state = ti.state;
1309 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1310 ti.state = ompt_state_wait_critical;
1313 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1314 if (ompt_enabled.ompt_callback_mutex_acquire) {
1315 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1316 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1317 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1323 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1326 __kmp_itt_critical_acquired(lck);
1328 #if OMPT_SUPPORT && OMPT_OPTIONAL
1329 if (ompt_enabled.enabled) {
1331 ti.state = prev_state;
1335 if (ompt_enabled.ompt_callback_mutex_acquired) {
1336 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1337 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1341 KMP_POP_PARTITIONED_TIMER();
1343 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1344 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1348 #if KMP_USE_DYNAMIC_LOCK
1351 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1353 #define KMP_TSX_LOCK(seq) lockseq_##seq
1355 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1358 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1359 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1361 #define KMP_CPUINFO_RTM 0
1365 if (hint & kmp_lock_hint_hle)
1366 return KMP_TSX_LOCK(hle);
1367 if (hint & kmp_lock_hint_rtm)
1368 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1369 if (hint & kmp_lock_hint_adaptive)
1370 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1373 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1374 return __kmp_user_lock_seq;
1375 if ((hint & omp_lock_hint_speculative) &&
1376 (hint & omp_lock_hint_nonspeculative))
1377 return __kmp_user_lock_seq;
1380 if (hint & omp_lock_hint_contended)
1381 return lockseq_queuing;
1384 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1388 if (hint & omp_lock_hint_speculative)
1389 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1391 return __kmp_user_lock_seq;
1394 #if OMPT_SUPPORT && OMPT_OPTIONAL
1395 #if KMP_USE_DYNAMIC_LOCK
1396 static kmp_mutex_impl_t
1397 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1399 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1404 return kmp_mutex_impl_queuing;
1407 return kmp_mutex_impl_spin;
1410 case locktag_rtm_spin:
1411 return kmp_mutex_impl_speculative;
1414 return kmp_mutex_impl_none;
1416 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1419 switch (ilock->type) {
1421 case locktag_adaptive:
1422 case locktag_rtm_queuing:
1423 return kmp_mutex_impl_speculative;
1425 case locktag_nested_tas:
1426 return kmp_mutex_impl_spin;
1428 case locktag_nested_futex:
1430 case locktag_ticket:
1431 case locktag_queuing:
1433 case locktag_nested_ticket:
1434 case locktag_nested_queuing:
1435 case locktag_nested_drdpa:
1436 return kmp_mutex_impl_queuing;
1438 return kmp_mutex_impl_none;
1443 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1444 switch (__kmp_user_lock_kind) {
1446 return kmp_mutex_impl_spin;
1453 return kmp_mutex_impl_queuing;
1456 case lk_rtm_queuing:
1459 return kmp_mutex_impl_speculative;
1462 return kmp_mutex_impl_none;
1482 kmp_critical_name *crit, uint32_t hint) {
1484 kmp_user_lock_p lck;
1485 #if OMPT_SUPPORT && OMPT_OPTIONAL
1486 ompt_state_t prev_state = ompt_state_undefined;
1487 ompt_thread_info_t ti;
1489 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1491 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1494 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1495 __kmp_assert_valid_gtid(global_tid);
1497 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1499 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1500 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1502 if (KMP_IS_D_LOCK(lockseq)) {
1503 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1504 KMP_GET_D_TAG(lockseq));
1506 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1512 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1513 lck = (kmp_user_lock_p)lk;
1514 if (__kmp_env_consistency_check) {
1515 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1516 __kmp_map_hint_to_lock(hint));
1519 __kmp_itt_critical_acquiring(lck);
1521 #if OMPT_SUPPORT && OMPT_OPTIONAL
1522 if (ompt_enabled.enabled) {
1523 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1525 prev_state = ti.state;
1526 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1527 ti.state = ompt_state_wait_critical;
1530 if (ompt_enabled.ompt_callback_mutex_acquire) {
1531 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1532 ompt_mutex_critical, (
unsigned int)hint,
1533 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1538 #if KMP_USE_INLINED_TAS
1539 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1540 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1542 #elif KMP_USE_INLINED_FUTEX
1543 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1544 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1548 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1551 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1553 if (__kmp_env_consistency_check) {
1554 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1555 __kmp_map_hint_to_lock(hint));
1558 __kmp_itt_critical_acquiring(lck);
1560 #if OMPT_SUPPORT && OMPT_OPTIONAL
1561 if (ompt_enabled.enabled) {
1562 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1564 prev_state = ti.state;
1565 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1566 ti.state = ompt_state_wait_critical;
1569 if (ompt_enabled.ompt_callback_mutex_acquire) {
1570 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1571 ompt_mutex_critical, (
unsigned int)hint,
1572 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1577 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1579 KMP_POP_PARTITIONED_TIMER();
1582 __kmp_itt_critical_acquired(lck);
1584 #if OMPT_SUPPORT && OMPT_OPTIONAL
1585 if (ompt_enabled.enabled) {
1587 ti.state = prev_state;
1591 if (ompt_enabled.ompt_callback_mutex_acquired) {
1592 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1593 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1598 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1599 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1614 kmp_critical_name *crit) {
1615 kmp_user_lock_p lck;
1617 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1619 #if KMP_USE_DYNAMIC_LOCK
1620 int locktag = KMP_EXTRACT_D_TAG(crit);
1622 lck = (kmp_user_lock_p)crit;
1623 KMP_ASSERT(lck != NULL);
1624 if (__kmp_env_consistency_check) {
1625 __kmp_pop_sync(global_tid, ct_critical, loc);
1628 __kmp_itt_critical_releasing(lck);
1630 #if KMP_USE_INLINED_TAS
1631 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1632 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1634 #elif KMP_USE_INLINED_FUTEX
1635 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1636 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1640 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1643 kmp_indirect_lock_t *ilk =
1644 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1645 KMP_ASSERT(ilk != NULL);
1647 if (__kmp_env_consistency_check) {
1648 __kmp_pop_sync(global_tid, ct_critical, loc);
1651 __kmp_itt_critical_releasing(lck);
1653 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1658 if ((__kmp_user_lock_kind == lk_tas) &&
1659 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1660 lck = (kmp_user_lock_p)crit;
1663 else if ((__kmp_user_lock_kind == lk_futex) &&
1664 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1665 lck = (kmp_user_lock_p)crit;
1669 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1672 KMP_ASSERT(lck != NULL);
1674 if (__kmp_env_consistency_check)
1675 __kmp_pop_sync(global_tid, ct_critical, loc);
1678 __kmp_itt_critical_releasing(lck);
1682 __kmp_release_user_lock_with_checks(lck, global_tid);
1686 #if OMPT_SUPPORT && OMPT_OPTIONAL
1689 OMPT_STORE_RETURN_ADDRESS(global_tid);
1690 if (ompt_enabled.ompt_callback_mutex_released) {
1691 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1692 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1693 OMPT_LOAD_RETURN_ADDRESS(0));
1697 KMP_POP_PARTITIONED_TIMER();
1698 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1712 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1713 __kmp_assert_valid_gtid(global_tid);
1715 if (!TCR_4(__kmp_init_parallel))
1716 __kmp_parallel_initialize();
1718 __kmp_resume_if_soft_paused();
1720 if (__kmp_env_consistency_check)
1721 __kmp_check_barrier(global_tid, ct_barrier, loc);
1724 ompt_frame_t *ompt_frame;
1725 if (ompt_enabled.enabled) {
1726 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1727 if (ompt_frame->enter_frame.ptr == NULL)
1728 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1730 OMPT_STORE_RETURN_ADDRESS(global_tid);
1733 __kmp_threads[global_tid]->th.th_ident = loc;
1735 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1736 #if OMPT_SUPPORT && OMPT_OPTIONAL
1737 if (ompt_enabled.enabled) {
1738 ompt_frame->enter_frame = ompt_data_none;
1742 return (status != 0) ? 0 : 1;
1755 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1756 __kmp_assert_valid_gtid(global_tid);
1757 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1772 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1773 __kmp_assert_valid_gtid(global_tid);
1775 if (!TCR_4(__kmp_init_parallel))
1776 __kmp_parallel_initialize();
1778 __kmp_resume_if_soft_paused();
1780 if (__kmp_env_consistency_check) {
1782 KMP_WARNING(ConstructIdentInvalid);
1784 __kmp_check_barrier(global_tid, ct_barrier, loc);
1788 ompt_frame_t *ompt_frame;
1789 if (ompt_enabled.enabled) {
1790 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1791 if (ompt_frame->enter_frame.ptr == NULL)
1792 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1794 OMPT_STORE_RETURN_ADDRESS(global_tid);
1797 __kmp_threads[global_tid]->th.th_ident = loc;
1799 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1800 #if OMPT_SUPPORT && OMPT_OPTIONAL
1801 if (ompt_enabled.enabled) {
1802 ompt_frame->enter_frame = ompt_data_none;
1808 if (__kmp_env_consistency_check) {
1814 __kmp_pop_sync(global_tid, ct_master, loc);
1834 __kmp_assert_valid_gtid(global_tid);
1835 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1840 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1843 #if OMPT_SUPPORT && OMPT_OPTIONAL
1844 kmp_info_t *this_thr = __kmp_threads[global_tid];
1845 kmp_team_t *team = this_thr->th.th_team;
1846 int tid = __kmp_tid_from_gtid(global_tid);
1848 if (ompt_enabled.enabled) {
1850 if (ompt_enabled.ompt_callback_work) {
1851 ompt_callbacks.ompt_callback(ompt_callback_work)(
1852 ompt_work_single_executor, ompt_scope_begin,
1853 &(team->t.ompt_team_info.parallel_data),
1854 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1855 1, OMPT_GET_RETURN_ADDRESS(0));
1858 if (ompt_enabled.ompt_callback_work) {
1859 ompt_callbacks.ompt_callback(ompt_callback_work)(
1860 ompt_work_single_other, ompt_scope_begin,
1861 &(team->t.ompt_team_info.parallel_data),
1862 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1863 1, OMPT_GET_RETURN_ADDRESS(0));
1864 ompt_callbacks.ompt_callback(ompt_callback_work)(
1865 ompt_work_single_other, ompt_scope_end,
1866 &(team->t.ompt_team_info.parallel_data),
1867 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1868 1, OMPT_GET_RETURN_ADDRESS(0));
1887 __kmp_assert_valid_gtid(global_tid);
1888 __kmp_exit_single(global_tid);
1889 KMP_POP_PARTITIONED_TIMER();
1891 #if OMPT_SUPPORT && OMPT_OPTIONAL
1892 kmp_info_t *this_thr = __kmp_threads[global_tid];
1893 kmp_team_t *team = this_thr->th.th_team;
1894 int tid = __kmp_tid_from_gtid(global_tid);
1896 if (ompt_enabled.ompt_callback_work) {
1897 ompt_callbacks.ompt_callback(ompt_callback_work)(
1898 ompt_work_single_executor, ompt_scope_end,
1899 &(team->t.ompt_team_info.parallel_data),
1900 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1901 OMPT_GET_RETURN_ADDRESS(0));
1914 KMP_POP_PARTITIONED_TIMER();
1915 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1917 #if OMPT_SUPPORT && OMPT_OPTIONAL
1918 if (ompt_enabled.ompt_callback_work) {
1919 ompt_work_t ompt_work_type = ompt_work_loop;
1920 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1921 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1925 ompt_work_type = ompt_work_loop;
1927 ompt_work_type = ompt_work_sections;
1929 ompt_work_type = ompt_work_distribute;
1934 KMP_DEBUG_ASSERT(ompt_work_type);
1936 ompt_callbacks.ompt_callback(ompt_callback_work)(
1937 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1938 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1941 if (__kmp_env_consistency_check)
1942 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1948 void ompc_set_num_threads(
int arg) {
1950 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1953 void ompc_set_dynamic(
int flag) {
1957 thread = __kmp_entry_thread();
1959 __kmp_save_internal_controls(thread);
1961 set__dynamic(thread, flag ?
true :
false);
1964 void ompc_set_nested(
int flag) {
1968 thread = __kmp_entry_thread();
1970 __kmp_save_internal_controls(thread);
1972 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1975 void ompc_set_max_active_levels(
int max_active_levels) {
1980 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1983 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1985 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1988 int ompc_get_ancestor_thread_num(
int level) {
1989 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1992 int ompc_get_team_size(
int level) {
1993 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1997 void KMP_EXPAND_NAME(ompc_set_affinity_format)(
char const *format) {
1998 if (!__kmp_init_serial) {
1999 __kmp_serial_initialize();
2001 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2002 format, KMP_STRLEN(format) + 1);
2005 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(
char *buffer,
size_t size) {
2007 if (!__kmp_init_serial) {
2008 __kmp_serial_initialize();
2010 format_size = KMP_STRLEN(__kmp_affinity_format);
2011 if (buffer && size) {
2012 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2018 void KMP_EXPAND_NAME(ompc_display_affinity)(
char const *format) {
2020 if (!TCR_4(__kmp_init_middle)) {
2021 __kmp_middle_initialize();
2023 __kmp_assign_root_init_mask();
2024 gtid = __kmp_get_gtid();
2025 __kmp_aux_display_affinity(gtid, format);
2028 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(
char *buffer,
size_t buf_size,
2029 char const *format) {
2031 size_t num_required;
2032 kmp_str_buf_t capture_buf;
2033 if (!TCR_4(__kmp_init_middle)) {
2034 __kmp_middle_initialize();
2036 __kmp_assign_root_init_mask();
2037 gtid = __kmp_get_gtid();
2038 __kmp_str_buf_init(&capture_buf);
2039 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2040 if (buffer && buf_size) {
2041 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2042 capture_buf.used + 1);
2044 __kmp_str_buf_free(&capture_buf);
2045 return num_required;
2048 void kmpc_set_stacksize(
int arg) {
2050 __kmp_aux_set_stacksize(arg);
2053 void kmpc_set_stacksize_s(
size_t arg) {
2055 __kmp_aux_set_stacksize(arg);
2058 void kmpc_set_blocktime(
int arg) {
2062 gtid = __kmp_entry_gtid();
2063 tid = __kmp_tid_from_gtid(gtid);
2064 thread = __kmp_thread_from_gtid(gtid);
2066 __kmp_aux_set_blocktime(arg, thread, tid);
2069 void kmpc_set_library(
int arg) {
2071 __kmp_user_set_library((
enum library_type)arg);
2074 void kmpc_set_defaults(
char const *str) {
2076 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2079 void kmpc_set_disp_num_buffers(
int arg) {
2082 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2083 arg <= KMP_MAX_DISP_NUM_BUFF) {
2084 __kmp_dispatch_num_buffers = arg;
2088 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
2089 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2092 if (!TCR_4(__kmp_init_middle)) {
2093 __kmp_middle_initialize();
2095 __kmp_assign_root_init_mask();
2096 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2100 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2101 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2104 if (!TCR_4(__kmp_init_middle)) {
2105 __kmp_middle_initialize();
2107 __kmp_assign_root_init_mask();
2108 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2112 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2113 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2116 if (!TCR_4(__kmp_init_middle)) {
2117 __kmp_middle_initialize();
2119 __kmp_assign_root_init_mask();
2120 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2170 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2173 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2174 __kmp_assert_valid_gtid(gtid);
2178 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2180 if (__kmp_env_consistency_check) {
2182 KMP_WARNING(ConstructIdentInvalid);
2189 *data_ptr = cpy_data;
2192 ompt_frame_t *ompt_frame;
2193 if (ompt_enabled.enabled) {
2194 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2195 if (ompt_frame->enter_frame.ptr == NULL)
2196 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2198 OMPT_STORE_RETURN_ADDRESS(gtid);
2202 __kmp_threads[gtid]->th.th_ident = loc;
2204 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2207 (*cpy_func)(cpy_data, *data_ptr);
2213 OMPT_STORE_RETURN_ADDRESS(gtid);
2216 __kmp_threads[gtid]->th.th_ident = loc;
2219 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2220 #if OMPT_SUPPORT && OMPT_OPTIONAL
2221 if (ompt_enabled.enabled) {
2222 ompt_frame->enter_frame = ompt_data_none;
2230 #define INIT_LOCK __kmp_init_user_lock_with_checks
2231 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2232 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2233 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2234 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2235 #define ACQUIRE_NESTED_LOCK_TIMED \
2236 __kmp_acquire_nested_user_lock_with_checks_timed
2237 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2238 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2239 #define TEST_LOCK __kmp_test_user_lock_with_checks
2240 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2241 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2242 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2247 #if KMP_USE_DYNAMIC_LOCK
2250 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2251 kmp_dyna_lockseq_t seq) {
2252 if (KMP_IS_D_LOCK(seq)) {
2253 KMP_INIT_D_LOCK(lock, seq);
2255 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2258 KMP_INIT_I_LOCK(lock, seq);
2260 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2261 __kmp_itt_lock_creating(ilk->lock, loc);
2267 static __forceinline
void
2268 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2269 kmp_dyna_lockseq_t seq) {
2272 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2273 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2274 seq = __kmp_user_lock_seq;
2278 seq = lockseq_nested_tas;
2282 seq = lockseq_nested_futex;
2285 case lockseq_ticket:
2286 seq = lockseq_nested_ticket;
2288 case lockseq_queuing:
2289 seq = lockseq_nested_queuing;
2292 seq = lockseq_nested_drdpa;
2295 seq = lockseq_nested_queuing;
2297 KMP_INIT_I_LOCK(lock, seq);
2299 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2300 __kmp_itt_lock_creating(ilk->lock, loc);
2305 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2307 KMP_DEBUG_ASSERT(__kmp_init_serial);
2308 if (__kmp_env_consistency_check && user_lock == NULL) {
2309 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2312 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2314 #if OMPT_SUPPORT && OMPT_OPTIONAL
2316 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2318 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2319 if (ompt_enabled.ompt_callback_lock_init) {
2320 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2321 ompt_mutex_lock, (omp_lock_hint_t)hint,
2322 __ompt_get_mutex_impl_type(user_lock),
2323 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2329 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2330 void **user_lock, uintptr_t hint) {
2331 KMP_DEBUG_ASSERT(__kmp_init_serial);
2332 if (__kmp_env_consistency_check && user_lock == NULL) {
2333 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2336 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2338 #if OMPT_SUPPORT && OMPT_OPTIONAL
2340 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2342 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2343 if (ompt_enabled.ompt_callback_lock_init) {
2344 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2345 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2346 __ompt_get_mutex_impl_type(user_lock),
2347 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2355 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2356 #if KMP_USE_DYNAMIC_LOCK
2358 KMP_DEBUG_ASSERT(__kmp_init_serial);
2359 if (__kmp_env_consistency_check && user_lock == NULL) {
2360 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2362 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2364 #if OMPT_SUPPORT && OMPT_OPTIONAL
2366 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2368 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2369 if (ompt_enabled.ompt_callback_lock_init) {
2370 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2371 ompt_mutex_lock, omp_lock_hint_none,
2372 __ompt_get_mutex_impl_type(user_lock),
2373 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2379 static char const *
const func =
"omp_init_lock";
2380 kmp_user_lock_p lck;
2381 KMP_DEBUG_ASSERT(__kmp_init_serial);
2383 if (__kmp_env_consistency_check) {
2384 if (user_lock == NULL) {
2385 KMP_FATAL(LockIsUninitialized, func);
2389 KMP_CHECK_USER_LOCK_INIT();
2391 if ((__kmp_user_lock_kind == lk_tas) &&
2392 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2393 lck = (kmp_user_lock_p)user_lock;
2396 else if ((__kmp_user_lock_kind == lk_futex) &&
2397 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2398 lck = (kmp_user_lock_p)user_lock;
2402 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2405 __kmp_set_user_lock_location(lck, loc);
2407 #if OMPT_SUPPORT && OMPT_OPTIONAL
2409 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2411 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2412 if (ompt_enabled.ompt_callback_lock_init) {
2413 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2414 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2415 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2420 __kmp_itt_lock_creating(lck);
2427 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2428 #if KMP_USE_DYNAMIC_LOCK
2430 KMP_DEBUG_ASSERT(__kmp_init_serial);
2431 if (__kmp_env_consistency_check && user_lock == NULL) {
2432 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2434 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2436 #if OMPT_SUPPORT && OMPT_OPTIONAL
2438 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2440 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2441 if (ompt_enabled.ompt_callback_lock_init) {
2442 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2443 ompt_mutex_nest_lock, omp_lock_hint_none,
2444 __ompt_get_mutex_impl_type(user_lock),
2445 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2451 static char const *
const func =
"omp_init_nest_lock";
2452 kmp_user_lock_p lck;
2453 KMP_DEBUG_ASSERT(__kmp_init_serial);
2455 if (__kmp_env_consistency_check) {
2456 if (user_lock == NULL) {
2457 KMP_FATAL(LockIsUninitialized, func);
2461 KMP_CHECK_USER_LOCK_INIT();
2463 if ((__kmp_user_lock_kind == lk_tas) &&
2464 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2465 OMP_NEST_LOCK_T_SIZE)) {
2466 lck = (kmp_user_lock_p)user_lock;
2469 else if ((__kmp_user_lock_kind == lk_futex) &&
2470 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2471 OMP_NEST_LOCK_T_SIZE)) {
2472 lck = (kmp_user_lock_p)user_lock;
2476 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2479 INIT_NESTED_LOCK(lck);
2480 __kmp_set_user_lock_location(lck, loc);
2482 #if OMPT_SUPPORT && OMPT_OPTIONAL
2484 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2486 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2487 if (ompt_enabled.ompt_callback_lock_init) {
2488 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2489 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2490 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2495 __kmp_itt_lock_creating(lck);
2501 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2502 #if KMP_USE_DYNAMIC_LOCK
2505 kmp_user_lock_p lck;
2506 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2507 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2509 lck = (kmp_user_lock_p)user_lock;
2511 __kmp_itt_lock_destroyed(lck);
2513 #if OMPT_SUPPORT && OMPT_OPTIONAL
2515 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2517 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2518 if (ompt_enabled.ompt_callback_lock_destroy) {
2519 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2520 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2523 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2525 kmp_user_lock_p lck;
2527 if ((__kmp_user_lock_kind == lk_tas) &&
2528 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2529 lck = (kmp_user_lock_p)user_lock;
2532 else if ((__kmp_user_lock_kind == lk_futex) &&
2533 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2534 lck = (kmp_user_lock_p)user_lock;
2538 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2541 #if OMPT_SUPPORT && OMPT_OPTIONAL
2543 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2545 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2546 if (ompt_enabled.ompt_callback_lock_destroy) {
2547 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2548 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2553 __kmp_itt_lock_destroyed(lck);
2557 if ((__kmp_user_lock_kind == lk_tas) &&
2558 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2562 else if ((__kmp_user_lock_kind == lk_futex) &&
2563 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2568 __kmp_user_lock_free(user_lock, gtid, lck);
2574 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2575 #if KMP_USE_DYNAMIC_LOCK
2578 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2579 __kmp_itt_lock_destroyed(ilk->lock);
2581 #if OMPT_SUPPORT && OMPT_OPTIONAL
2583 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2585 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2586 if (ompt_enabled.ompt_callback_lock_destroy) {
2587 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2588 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2591 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2595 kmp_user_lock_p lck;
2597 if ((__kmp_user_lock_kind == lk_tas) &&
2598 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2599 OMP_NEST_LOCK_T_SIZE)) {
2600 lck = (kmp_user_lock_p)user_lock;
2603 else if ((__kmp_user_lock_kind == lk_futex) &&
2604 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2605 OMP_NEST_LOCK_T_SIZE)) {
2606 lck = (kmp_user_lock_p)user_lock;
2610 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2613 #if OMPT_SUPPORT && OMPT_OPTIONAL
2615 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2617 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2618 if (ompt_enabled.ompt_callback_lock_destroy) {
2619 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2620 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2625 __kmp_itt_lock_destroyed(lck);
2628 DESTROY_NESTED_LOCK(lck);
2630 if ((__kmp_user_lock_kind == lk_tas) &&
2631 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2632 OMP_NEST_LOCK_T_SIZE)) {
2636 else if ((__kmp_user_lock_kind == lk_futex) &&
2637 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2638 OMP_NEST_LOCK_T_SIZE)) {
2643 __kmp_user_lock_free(user_lock, gtid, lck);
2648 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2650 #if KMP_USE_DYNAMIC_LOCK
2651 int tag = KMP_EXTRACT_D_TAG(user_lock);
2653 __kmp_itt_lock_acquiring(
2657 #if OMPT_SUPPORT && OMPT_OPTIONAL
2659 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2661 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2662 if (ompt_enabled.ompt_callback_mutex_acquire) {
2663 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2664 ompt_mutex_lock, omp_lock_hint_none,
2665 __ompt_get_mutex_impl_type(user_lock),
2666 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2669 #if KMP_USE_INLINED_TAS
2670 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2671 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2673 #elif KMP_USE_INLINED_FUTEX
2674 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2675 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2679 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2682 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2684 #if OMPT_SUPPORT && OMPT_OPTIONAL
2685 if (ompt_enabled.ompt_callback_mutex_acquired) {
2686 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2687 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2693 kmp_user_lock_p lck;
2695 if ((__kmp_user_lock_kind == lk_tas) &&
2696 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2697 lck = (kmp_user_lock_p)user_lock;
2700 else if ((__kmp_user_lock_kind == lk_futex) &&
2701 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2702 lck = (kmp_user_lock_p)user_lock;
2706 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2710 __kmp_itt_lock_acquiring(lck);
2712 #if OMPT_SUPPORT && OMPT_OPTIONAL
2714 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2716 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2717 if (ompt_enabled.ompt_callback_mutex_acquire) {
2718 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2719 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2720 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2724 ACQUIRE_LOCK(lck, gtid);
2727 __kmp_itt_lock_acquired(lck);
2730 #if OMPT_SUPPORT && OMPT_OPTIONAL
2731 if (ompt_enabled.ompt_callback_mutex_acquired) {
2732 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2733 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2740 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2741 #if KMP_USE_DYNAMIC_LOCK
2744 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2746 #if OMPT_SUPPORT && OMPT_OPTIONAL
2748 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2750 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2751 if (ompt_enabled.enabled) {
2752 if (ompt_enabled.ompt_callback_mutex_acquire) {
2753 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2754 ompt_mutex_nest_lock, omp_lock_hint_none,
2755 __ompt_get_mutex_impl_type(user_lock),
2756 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2760 int acquire_status =
2761 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2762 (void)acquire_status;
2764 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2767 #if OMPT_SUPPORT && OMPT_OPTIONAL
2768 if (ompt_enabled.enabled) {
2769 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2770 if (ompt_enabled.ompt_callback_mutex_acquired) {
2772 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2773 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2777 if (ompt_enabled.ompt_callback_nest_lock) {
2779 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2780 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2788 kmp_user_lock_p lck;
2790 if ((__kmp_user_lock_kind == lk_tas) &&
2791 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2792 OMP_NEST_LOCK_T_SIZE)) {
2793 lck = (kmp_user_lock_p)user_lock;
2796 else if ((__kmp_user_lock_kind == lk_futex) &&
2797 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2798 OMP_NEST_LOCK_T_SIZE)) {
2799 lck = (kmp_user_lock_p)user_lock;
2803 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2807 __kmp_itt_lock_acquiring(lck);
2809 #if OMPT_SUPPORT && OMPT_OPTIONAL
2811 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2813 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2814 if (ompt_enabled.enabled) {
2815 if (ompt_enabled.ompt_callback_mutex_acquire) {
2816 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2817 ompt_mutex_nest_lock, omp_lock_hint_none,
2818 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2824 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2827 __kmp_itt_lock_acquired(lck);
2830 #if OMPT_SUPPORT && OMPT_OPTIONAL
2831 if (ompt_enabled.enabled) {
2832 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2833 if (ompt_enabled.ompt_callback_mutex_acquired) {
2835 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2836 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2839 if (ompt_enabled.ompt_callback_nest_lock) {
2841 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2842 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2851 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2852 #if KMP_USE_DYNAMIC_LOCK
2854 int tag = KMP_EXTRACT_D_TAG(user_lock);
2856 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2858 #if KMP_USE_INLINED_TAS
2859 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2860 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2862 #elif KMP_USE_INLINED_FUTEX
2863 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2864 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2868 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2871 #if OMPT_SUPPORT && OMPT_OPTIONAL
2873 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2875 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2876 if (ompt_enabled.ompt_callback_mutex_released) {
2877 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2878 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2884 kmp_user_lock_p lck;
2889 if ((__kmp_user_lock_kind == lk_tas) &&
2890 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2891 #if KMP_OS_LINUX && \
2892 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2895 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2897 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2900 #if OMPT_SUPPORT && OMPT_OPTIONAL
2902 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2904 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2905 if (ompt_enabled.ompt_callback_mutex_released) {
2906 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2907 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2913 lck = (kmp_user_lock_p)user_lock;
2917 else if ((__kmp_user_lock_kind == lk_futex) &&
2918 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2919 lck = (kmp_user_lock_p)user_lock;
2923 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2927 __kmp_itt_lock_releasing(lck);
2930 RELEASE_LOCK(lck, gtid);
2932 #if OMPT_SUPPORT && OMPT_OPTIONAL
2934 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2936 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2937 if (ompt_enabled.ompt_callback_mutex_released) {
2938 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2939 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2947 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2948 #if KMP_USE_DYNAMIC_LOCK
2951 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2953 int release_status =
2954 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2955 (void)release_status;
2957 #if OMPT_SUPPORT && OMPT_OPTIONAL
2959 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2961 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2962 if (ompt_enabled.enabled) {
2963 if (release_status == KMP_LOCK_RELEASED) {
2964 if (ompt_enabled.ompt_callback_mutex_released) {
2966 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2967 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2970 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2972 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2973 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2980 kmp_user_lock_p lck;
2984 if ((__kmp_user_lock_kind == lk_tas) &&
2985 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2986 OMP_NEST_LOCK_T_SIZE)) {
2987 #if KMP_OS_LINUX && \
2988 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2990 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2992 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2995 #if OMPT_SUPPORT && OMPT_OPTIONAL
2996 int release_status = KMP_LOCK_STILL_HELD;
2999 if (--(tl->lk.depth_locked) == 0) {
3000 TCW_4(tl->lk.poll, 0);
3001 #if OMPT_SUPPORT && OMPT_OPTIONAL
3002 release_status = KMP_LOCK_RELEASED;
3007 #if OMPT_SUPPORT && OMPT_OPTIONAL
3009 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3011 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3012 if (ompt_enabled.enabled) {
3013 if (release_status == KMP_LOCK_RELEASED) {
3014 if (ompt_enabled.ompt_callback_mutex_released) {
3016 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3017 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3019 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3021 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3022 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3029 lck = (kmp_user_lock_p)user_lock;
3033 else if ((__kmp_user_lock_kind == lk_futex) &&
3034 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3035 OMP_NEST_LOCK_T_SIZE)) {
3036 lck = (kmp_user_lock_p)user_lock;
3040 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
3044 __kmp_itt_lock_releasing(lck);
3048 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3049 #if OMPT_SUPPORT && OMPT_OPTIONAL
3051 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3053 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3054 if (ompt_enabled.enabled) {
3055 if (release_status == KMP_LOCK_RELEASED) {
3056 if (ompt_enabled.ompt_callback_mutex_released) {
3058 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3059 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3061 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3063 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3064 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3073 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3076 #if KMP_USE_DYNAMIC_LOCK
3078 int tag = KMP_EXTRACT_D_TAG(user_lock);
3080 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3082 #if OMPT_SUPPORT && OMPT_OPTIONAL
3084 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3086 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3087 if (ompt_enabled.ompt_callback_mutex_acquire) {
3088 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3089 ompt_mutex_lock, omp_lock_hint_none,
3090 __ompt_get_mutex_impl_type(user_lock),
3091 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3094 #if KMP_USE_INLINED_TAS
3095 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3096 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3098 #elif KMP_USE_INLINED_FUTEX
3099 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3100 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3104 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3108 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3110 #if OMPT_SUPPORT && OMPT_OPTIONAL
3111 if (ompt_enabled.ompt_callback_mutex_acquired) {
3112 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3113 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3119 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3126 kmp_user_lock_p lck;
3129 if ((__kmp_user_lock_kind == lk_tas) &&
3130 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3131 lck = (kmp_user_lock_p)user_lock;
3134 else if ((__kmp_user_lock_kind == lk_futex) &&
3135 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3136 lck = (kmp_user_lock_p)user_lock;
3140 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3144 __kmp_itt_lock_acquiring(lck);
3146 #if OMPT_SUPPORT && OMPT_OPTIONAL
3148 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3150 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3151 if (ompt_enabled.ompt_callback_mutex_acquire) {
3152 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3153 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3154 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3158 rc = TEST_LOCK(lck, gtid);
3161 __kmp_itt_lock_acquired(lck);
3163 __kmp_itt_lock_cancelled(lck);
3166 #if OMPT_SUPPORT && OMPT_OPTIONAL
3167 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3168 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3169 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3173 return (rc ? FTN_TRUE : FTN_FALSE);
3181 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3182 #if KMP_USE_DYNAMIC_LOCK
3185 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3187 #if OMPT_SUPPORT && OMPT_OPTIONAL
3189 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3191 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3192 if (ompt_enabled.ompt_callback_mutex_acquire) {
3193 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3194 ompt_mutex_nest_lock, omp_lock_hint_none,
3195 __ompt_get_mutex_impl_type(user_lock),
3196 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3199 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3202 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3204 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3207 #if OMPT_SUPPORT && OMPT_OPTIONAL
3208 if (ompt_enabled.enabled && rc) {
3210 if (ompt_enabled.ompt_callback_mutex_acquired) {
3212 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3213 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3217 if (ompt_enabled.ompt_callback_nest_lock) {
3219 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3220 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3229 kmp_user_lock_p lck;
3232 if ((__kmp_user_lock_kind == lk_tas) &&
3233 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3234 OMP_NEST_LOCK_T_SIZE)) {
3235 lck = (kmp_user_lock_p)user_lock;
3238 else if ((__kmp_user_lock_kind == lk_futex) &&
3239 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3240 OMP_NEST_LOCK_T_SIZE)) {
3241 lck = (kmp_user_lock_p)user_lock;
3245 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3249 __kmp_itt_lock_acquiring(lck);
3252 #if OMPT_SUPPORT && OMPT_OPTIONAL
3254 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3256 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3257 if (ompt_enabled.enabled) &&
3258 ompt_enabled.ompt_callback_mutex_acquire) {
3259 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3260 ompt_mutex_nest_lock, omp_lock_hint_none,
3261 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3266 rc = TEST_NESTED_LOCK(lck, gtid);
3269 __kmp_itt_lock_acquired(lck);
3271 __kmp_itt_lock_cancelled(lck);
3274 #if OMPT_SUPPORT && OMPT_OPTIONAL
3275 if (ompt_enabled.enabled && rc) {
3277 if (ompt_enabled.ompt_callback_mutex_acquired) {
3279 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3280 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3283 if (ompt_enabled.ompt_callback_nest_lock) {
3285 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3286 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3305 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3306 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3308 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3309 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3315 static __forceinline
void
3316 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3317 kmp_critical_name *crit) {
3323 kmp_user_lock_p lck;
3325 #if KMP_USE_DYNAMIC_LOCK
3327 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3330 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3331 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3332 KMP_GET_D_TAG(__kmp_user_lock_seq));
3334 __kmp_init_indirect_csptr(crit, loc, global_tid,
3335 KMP_GET_I_TAG(__kmp_user_lock_seq));
3341 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3342 lck = (kmp_user_lock_p)lk;
3343 KMP_DEBUG_ASSERT(lck != NULL);
3344 if (__kmp_env_consistency_check) {
3345 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3347 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3349 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3351 KMP_DEBUG_ASSERT(lck != NULL);
3352 if (__kmp_env_consistency_check) {
3353 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3355 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3363 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3364 lck = (kmp_user_lock_p)crit;
3366 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3368 KMP_DEBUG_ASSERT(lck != NULL);
3370 if (__kmp_env_consistency_check)
3371 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3373 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3379 static __forceinline
void
3380 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3381 kmp_critical_name *crit) {
3383 kmp_user_lock_p lck;
3385 #if KMP_USE_DYNAMIC_LOCK
3387 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3388 lck = (kmp_user_lock_p)crit;
3389 if (__kmp_env_consistency_check)
3390 __kmp_pop_sync(global_tid, ct_critical, loc);
3391 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3393 kmp_indirect_lock_t *ilk =
3394 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3395 if (__kmp_env_consistency_check)
3396 __kmp_pop_sync(global_tid, ct_critical, loc);
3397 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3405 if (__kmp_base_user_lock_size > 32) {
3406 lck = *((kmp_user_lock_p *)crit);
3407 KMP_ASSERT(lck != NULL);
3409 lck = (kmp_user_lock_p)crit;
3412 if (__kmp_env_consistency_check)
3413 __kmp_pop_sync(global_tid, ct_critical, loc);
3415 __kmp_release_user_lock_with_checks(lck, global_tid);
3420 static __forceinline
int
3421 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3426 if (th->th.th_teams_microtask) {
3427 *team_p = team = th->th.th_team;
3428 if (team->t.t_level == th->th.th_teams_level) {
3430 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3432 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3433 th->th.th_team = team->t.t_parent;
3434 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3435 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3436 *task_state = th->th.th_task_state;
3437 th->th.th_task_state = 0;
3445 static __forceinline
void
3446 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3448 th->th.th_info.ds.ds_tid = 0;
3449 th->th.th_team = team;
3450 th->th.th_team_nproc = team->t.t_nproc;
3451 th->th.th_task_team = team->t.t_task_team[task_state];
3452 __kmp_type_convert(task_state, &(th->th.th_task_state));
3473 size_t reduce_size,
void *reduce_data,
3474 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3475 kmp_critical_name *lck) {
3479 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3482 int teams_swapped = 0, task_state;
3483 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3484 __kmp_assert_valid_gtid(global_tid);
3492 if (!TCR_4(__kmp_init_parallel))
3493 __kmp_parallel_initialize();
3495 __kmp_resume_if_soft_paused();
3498 #if KMP_USE_DYNAMIC_LOCK
3499 if (__kmp_env_consistency_check)
3500 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3502 if (__kmp_env_consistency_check)
3503 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3506 th = __kmp_thread_from_gtid(global_tid);
3507 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3525 packed_reduction_method = __kmp_determine_reduction_method(
3526 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3527 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3529 OMPT_REDUCTION_DECL(th, global_tid);
3530 if (packed_reduction_method == critical_reduce_block) {
3532 OMPT_REDUCTION_BEGIN;
3534 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3537 }
else if (packed_reduction_method == empty_reduce_block) {
3539 OMPT_REDUCTION_BEGIN;
3545 }
else if (packed_reduction_method == atomic_reduce_block) {
3555 if (__kmp_env_consistency_check)
3556 __kmp_pop_sync(global_tid, ct_reduce, loc);
3558 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3559 tree_reduce_block)) {
3579 ompt_frame_t *ompt_frame;
3580 if (ompt_enabled.enabled) {
3581 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3582 if (ompt_frame->enter_frame.ptr == NULL)
3583 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3585 OMPT_STORE_RETURN_ADDRESS(global_tid);
3588 __kmp_threads[global_tid]->th.th_ident = loc;
3591 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3592 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3593 retval = (retval != 0) ? (0) : (1);
3594 #if OMPT_SUPPORT && OMPT_OPTIONAL
3595 if (ompt_enabled.enabled) {
3596 ompt_frame->enter_frame = ompt_data_none;
3602 if (__kmp_env_consistency_check) {
3604 __kmp_pop_sync(global_tid, ct_reduce, loc);
3613 if (teams_swapped) {
3614 __kmp_restore_swapped_teams(th, team, task_state);
3618 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3619 global_tid, packed_reduction_method, retval));
3633 kmp_critical_name *lck) {
3635 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3637 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3638 __kmp_assert_valid_gtid(global_tid);
3640 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3642 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3644 if (packed_reduction_method == critical_reduce_block) {
3646 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3649 }
else if (packed_reduction_method == empty_reduce_block) {
3656 }
else if (packed_reduction_method == atomic_reduce_block) {
3663 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3664 tree_reduce_block)) {
3675 if (__kmp_env_consistency_check)
3676 __kmp_pop_sync(global_tid, ct_reduce, loc);
3678 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3679 global_tid, packed_reduction_method));
3702 size_t reduce_size,
void *reduce_data,
3703 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3704 kmp_critical_name *lck) {
3707 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3710 int teams_swapped = 0, task_state;
3712 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3713 __kmp_assert_valid_gtid(global_tid);
3721 if (!TCR_4(__kmp_init_parallel))
3722 __kmp_parallel_initialize();
3724 __kmp_resume_if_soft_paused();
3727 #if KMP_USE_DYNAMIC_LOCK
3728 if (__kmp_env_consistency_check)
3729 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3731 if (__kmp_env_consistency_check)
3732 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3735 th = __kmp_thread_from_gtid(global_tid);
3736 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3738 packed_reduction_method = __kmp_determine_reduction_method(
3739 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3740 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3742 OMPT_REDUCTION_DECL(th, global_tid);
3744 if (packed_reduction_method == critical_reduce_block) {
3746 OMPT_REDUCTION_BEGIN;
3747 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3750 }
else if (packed_reduction_method == empty_reduce_block) {
3752 OMPT_REDUCTION_BEGIN;
3757 }
else if (packed_reduction_method == atomic_reduce_block) {
3761 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3762 tree_reduce_block)) {
3768 ompt_frame_t *ompt_frame;
3769 if (ompt_enabled.enabled) {
3770 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3771 if (ompt_frame->enter_frame.ptr == NULL)
3772 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3774 OMPT_STORE_RETURN_ADDRESS(global_tid);
3777 __kmp_threads[global_tid]->th.th_ident =
3781 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3782 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3783 retval = (retval != 0) ? (0) : (1);
3784 #if OMPT_SUPPORT && OMPT_OPTIONAL
3785 if (ompt_enabled.enabled) {
3786 ompt_frame->enter_frame = ompt_data_none;
3792 if (__kmp_env_consistency_check) {
3794 __kmp_pop_sync(global_tid, ct_reduce, loc);
3803 if (teams_swapped) {
3804 __kmp_restore_swapped_teams(th, team, task_state);
3808 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3809 global_tid, packed_reduction_method, retval));
3824 kmp_critical_name *lck) {
3826 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3829 int teams_swapped = 0, task_state;
3831 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3832 __kmp_assert_valid_gtid(global_tid);
3834 th = __kmp_thread_from_gtid(global_tid);
3835 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3837 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3841 OMPT_REDUCTION_DECL(th, global_tid);
3843 if (packed_reduction_method == critical_reduce_block) {
3844 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3850 ompt_frame_t *ompt_frame;
3851 if (ompt_enabled.enabled) {
3852 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3853 if (ompt_frame->enter_frame.ptr == NULL)
3854 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3856 OMPT_STORE_RETURN_ADDRESS(global_tid);
3859 __kmp_threads[global_tid]->th.th_ident = loc;
3861 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3862 #if OMPT_SUPPORT && OMPT_OPTIONAL
3863 if (ompt_enabled.enabled) {
3864 ompt_frame->enter_frame = ompt_data_none;
3868 }
else if (packed_reduction_method == empty_reduce_block) {
3876 ompt_frame_t *ompt_frame;
3877 if (ompt_enabled.enabled) {
3878 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3879 if (ompt_frame->enter_frame.ptr == NULL)
3880 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3882 OMPT_STORE_RETURN_ADDRESS(global_tid);
3885 __kmp_threads[global_tid]->th.th_ident = loc;
3887 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3888 #if OMPT_SUPPORT && OMPT_OPTIONAL
3889 if (ompt_enabled.enabled) {
3890 ompt_frame->enter_frame = ompt_data_none;
3894 }
else if (packed_reduction_method == atomic_reduce_block) {
3897 ompt_frame_t *ompt_frame;
3898 if (ompt_enabled.enabled) {
3899 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3900 if (ompt_frame->enter_frame.ptr == NULL)
3901 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3903 OMPT_STORE_RETURN_ADDRESS(global_tid);
3907 __kmp_threads[global_tid]->th.th_ident = loc;
3909 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3910 #if OMPT_SUPPORT && OMPT_OPTIONAL
3911 if (ompt_enabled.enabled) {
3912 ompt_frame->enter_frame = ompt_data_none;
3916 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3917 tree_reduce_block)) {
3920 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3928 if (teams_swapped) {
3929 __kmp_restore_swapped_teams(th, team, task_state);
3932 if (__kmp_env_consistency_check)
3933 __kmp_pop_sync(global_tid, ct_reduce, loc);
3935 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3936 global_tid, packed_reduction_method));
3941 #undef __KMP_GET_REDUCTION_METHOD
3942 #undef __KMP_SET_REDUCTION_METHOD
3946 kmp_uint64 __kmpc_get_taskid() {
3951 gtid = __kmp_get_gtid();
3955 thread = __kmp_thread_from_gtid(gtid);
3956 return thread->th.th_current_task->td_task_id;
3960 kmp_uint64 __kmpc_get_parent_taskid() {
3964 kmp_taskdata_t *parent_task;
3966 gtid = __kmp_get_gtid();
3970 thread = __kmp_thread_from_gtid(gtid);
3971 parent_task = thread->th.th_current_task->td_parent;
3972 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3988 const struct kmp_dim *dims) {
3989 __kmp_assert_valid_gtid(gtid);
3991 kmp_int64 last, trace_count;
3992 kmp_info_t *th = __kmp_threads[gtid];
3993 kmp_team_t *team = th->th.th_team;
3995 kmp_disp_t *pr_buf = th->th.th_dispatch;
3996 dispatch_shared_info_t *sh_buf;
4000 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4001 gtid, num_dims, !team->t.t_serialized));
4002 KMP_DEBUG_ASSERT(dims != NULL);
4003 KMP_DEBUG_ASSERT(num_dims > 0);
4005 if (team->t.t_serialized) {
4006 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
4009 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4010 idx = pr_buf->th_doacross_buf_idx++;
4012 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4015 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4016 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4017 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
4018 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4019 pr_buf->th_doacross_info[0] =
4020 (kmp_int64)num_dims;
4023 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4024 pr_buf->th_doacross_info[2] = dims[0].lo;
4025 pr_buf->th_doacross_info[3] = dims[0].up;
4026 pr_buf->th_doacross_info[4] = dims[0].st;
4028 for (j = 1; j < num_dims; ++j) {
4031 if (dims[j].st == 1) {
4033 range_length = dims[j].up - dims[j].lo + 1;
4035 if (dims[j].st > 0) {
4036 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4037 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4039 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4041 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4044 pr_buf->th_doacross_info[last++] = range_length;
4045 pr_buf->th_doacross_info[last++] = dims[j].lo;
4046 pr_buf->th_doacross_info[last++] = dims[j].up;
4047 pr_buf->th_doacross_info[last++] = dims[j].st;
4052 if (dims[0].st == 1) {
4053 trace_count = dims[0].up - dims[0].lo + 1;
4054 }
else if (dims[0].st > 0) {
4055 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4056 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4058 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4059 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4061 for (j = 1; j < num_dims; ++j) {
4062 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
4064 KMP_DEBUG_ASSERT(trace_count > 0);
4068 if (idx != sh_buf->doacross_buf_idx) {
4070 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4077 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4078 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4080 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4081 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4083 if (flags == NULL) {
4086 (size_t)trace_count / 8 + 8;
4087 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4089 sh_buf->doacross_flags = flags;
4090 }
else if (flags == (kmp_uint32 *)1) {
4093 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4095 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4102 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
4103 pr_buf->th_doacross_flags =
4104 sh_buf->doacross_flags;
4106 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4109 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4110 __kmp_assert_valid_gtid(gtid);
4114 kmp_int64 iter_number;
4115 kmp_info_t *th = __kmp_threads[gtid];
4116 kmp_team_t *team = th->th.th_team;
4118 kmp_int64 lo, up, st;
4120 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4121 if (team->t.t_serialized) {
4122 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4127 pr_buf = th->th.th_dispatch;
4128 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4129 num_dims = (size_t)pr_buf->th_doacross_info[0];
4130 lo = pr_buf->th_doacross_info[2];
4131 up = pr_buf->th_doacross_info[3];
4132 st = pr_buf->th_doacross_info[4];
4133 #
if OMPT_SUPPORT && OMPT_OPTIONAL
4134 ompt_dependence_t deps[num_dims];
4137 if (vec[0] < lo || vec[0] > up) {
4138 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4139 "bounds [%lld,%lld]\n",
4140 gtid, vec[0], lo, up));
4143 iter_number = vec[0] - lo;
4144 }
else if (st > 0) {
4145 if (vec[0] < lo || vec[0] > up) {
4146 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4147 "bounds [%lld,%lld]\n",
4148 gtid, vec[0], lo, up));
4151 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4153 if (vec[0] > lo || vec[0] < up) {
4154 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4155 "bounds [%lld,%lld]\n",
4156 gtid, vec[0], lo, up));
4159 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4161 #if OMPT_SUPPORT && OMPT_OPTIONAL
4162 deps[0].variable.value = iter_number;
4163 deps[0].dependence_type = ompt_dependence_type_sink;
4165 for (i = 1; i < num_dims; ++i) {
4168 ln = pr_buf->th_doacross_info[j + 1];
4169 lo = pr_buf->th_doacross_info[j + 2];
4170 up = pr_buf->th_doacross_info[j + 3];
4171 st = pr_buf->th_doacross_info[j + 4];
4173 if (vec[i] < lo || vec[i] > up) {
4174 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4175 "bounds [%lld,%lld]\n",
4176 gtid, vec[i], lo, up));
4180 }
else if (st > 0) {
4181 if (vec[i] < lo || vec[i] > up) {
4182 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4183 "bounds [%lld,%lld]\n",
4184 gtid, vec[i], lo, up));
4187 iter = (kmp_uint64)(vec[i] - lo) / st;
4189 if (vec[i] > lo || vec[i] < up) {
4190 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4191 "bounds [%lld,%lld]\n",
4192 gtid, vec[i], lo, up));
4195 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4197 iter_number = iter + ln * iter_number;
4198 #if OMPT_SUPPORT && OMPT_OPTIONAL
4199 deps[i].variable.value = iter;
4200 deps[i].dependence_type = ompt_dependence_type_sink;
4203 shft = iter_number % 32;
4206 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4210 #if OMPT_SUPPORT && OMPT_OPTIONAL
4211 if (ompt_enabled.ompt_callback_dependences) {
4212 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4213 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4217 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4218 gtid, (iter_number << 5) + shft));
4221 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4222 __kmp_assert_valid_gtid(gtid);
4226 kmp_int64 iter_number;
4227 kmp_info_t *th = __kmp_threads[gtid];
4228 kmp_team_t *team = th->th.th_team;
4232 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4233 if (team->t.t_serialized) {
4234 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4240 pr_buf = th->th.th_dispatch;
4241 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4242 num_dims = (size_t)pr_buf->th_doacross_info[0];
4243 lo = pr_buf->th_doacross_info[2];
4244 st = pr_buf->th_doacross_info[4];
4245 #
if OMPT_SUPPORT && OMPT_OPTIONAL
4246 ompt_dependence_t deps[num_dims];
4249 iter_number = vec[0] - lo;
4250 }
else if (st > 0) {
4251 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4253 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4255 #if OMPT_SUPPORT && OMPT_OPTIONAL
4256 deps[0].variable.value = iter_number;
4257 deps[0].dependence_type = ompt_dependence_type_source;
4259 for (i = 1; i < num_dims; ++i) {
4262 ln = pr_buf->th_doacross_info[j + 1];
4263 lo = pr_buf->th_doacross_info[j + 2];
4264 st = pr_buf->th_doacross_info[j + 4];
4267 }
else if (st > 0) {
4268 iter = (kmp_uint64)(vec[i] - lo) / st;
4270 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4272 iter_number = iter + ln * iter_number;
4273 #if OMPT_SUPPORT && OMPT_OPTIONAL
4274 deps[i].variable.value = iter;
4275 deps[i].dependence_type = ompt_dependence_type_source;
4278 #if OMPT_SUPPORT && OMPT_OPTIONAL
4279 if (ompt_enabled.ompt_callback_dependences) {
4280 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4281 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4284 shft = iter_number % 32;
4288 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4289 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4290 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4291 (iter_number << 5) + shft));
4294 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4295 __kmp_assert_valid_gtid(gtid);
4297 kmp_info_t *th = __kmp_threads[gtid];
4298 kmp_team_t *team = th->th.th_team;
4299 kmp_disp_t *pr_buf = th->th.th_dispatch;
4301 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4302 if (team->t.t_serialized) {
4303 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4307 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4308 if (num_done == th->th.th_team_nproc) {
4310 int idx = pr_buf->th_doacross_buf_idx - 1;
4311 dispatch_shared_info_t *sh_buf =
4312 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4313 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4314 (kmp_int64)&sh_buf->doacross_num_done);
4315 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4316 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4317 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4318 sh_buf->doacross_flags = NULL;
4319 sh_buf->doacross_num_done = 0;
4320 sh_buf->doacross_buf_idx +=
4321 __kmp_dispatch_num_buffers;
4324 pr_buf->th_doacross_flags = NULL;
4325 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4326 pr_buf->th_doacross_info = NULL;
4327 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4331 void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4332 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4335 void *omp_calloc(
size_t nmemb,
size_t size, omp_allocator_handle_t allocator) {
4336 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator);
4339 void *omp_realloc(
void *ptr,
size_t size, omp_allocator_handle_t allocator,
4340 omp_allocator_handle_t free_allocator) {
4341 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4345 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4346 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4349 int __kmpc_get_target_offload(
void) {
4350 if (!__kmp_init_serial) {
4351 __kmp_serial_initialize();
4353 return __kmp_target_offload;
4356 int __kmpc_pause_resource(kmp_pause_status_t level) {
4357 if (!__kmp_init_serial) {
4360 return __kmp_pause_resource(level);
4363 void __kmpc_error(
ident_t *loc,
int severity,
const char *message) {
4364 if (!__kmp_init_serial)
4365 __kmp_serial_initialize();
4367 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4370 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4371 ompt_callbacks.ompt_callback(ompt_callback_error)(
4372 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4373 OMPT_GET_RETURN_ADDRESS(0));
4379 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->
psource,
false);
4381 __kmp_str_format(
"%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4382 __kmp_str_loc_free(&str_loc);
4384 src_loc = __kmp_str_format(
"unknown");
4387 if (severity == severity_warning)
4388 KMP_WARNING(UserDirectedWarning, src_loc, message);
4390 KMP_FATAL(UserDirectedError, src_loc, message);
4392 __kmp_str_free(&src_loc);
4396 void __kmpc_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4398 #if OMPT_SUPPORT && OMPT_OPTIONAL
4399 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4400 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4401 int tid = __kmp_tid_from_gtid(gtid);
4402 ompt_callbacks.ompt_callback(ompt_callback_work)(
4403 ompt_work_scope, ompt_scope_begin,
4404 &(team->t.ompt_team_info.parallel_data),
4405 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4406 OMPT_GET_RETURN_ADDRESS(0));
4412 void __kmpc_end_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4414 #if OMPT_SUPPORT && OMPT_OPTIONAL
4415 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4416 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4417 int tid = __kmp_tid_from_gtid(gtid);
4418 ompt_callbacks.ompt_callback(ompt_callback_work)(
4419 ompt_work_scope, ompt_scope_end,
4420 &(team->t.ompt_team_info.parallel_data),
4421 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4422 OMPT_GET_RETURN_ADDRESS(0));
4427 #ifdef KMP_USE_VERSION_SYMBOLS
4436 #ifdef omp_set_affinity_format
4437 #undef omp_set_affinity_format
4439 #ifdef omp_get_affinity_format
4440 #undef omp_get_affinity_format
4442 #ifdef omp_display_affinity
4443 #undef omp_display_affinity
4445 #ifdef omp_capture_affinity
4446 #undef omp_capture_affinity
4448 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4450 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4452 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4454 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
@ KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_DISTRIBUTE
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
stats_state_e
the states which a thread can be in
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)