LLVM OpenMP* Runtime Library
kmp_sched.cpp
1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15  NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16  it may change values between parallel regions. __kmp_max_nth
17  is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat) \
43  { \
44  kmp_int64 t; \
45  kmp_int64 u = (kmp_int64)(*pupper); \
46  kmp_int64 l = (kmp_int64)(*plower); \
47  kmp_int64 i = (kmp_int64)incr; \
48  if (i == 1) { \
49  t = u - l + 1; \
50  } else if (i == -1) { \
51  t = l - u + 1; \
52  } else if (i > 0) { \
53  t = (u - l) / i + 1; \
54  } else { \
55  t = (l - u) / (-i) + 1; \
56  } \
57  KMP_COUNT_VALUE(stat, t); \
58  KMP_POP_PARTITIONED_TIMER(); \
59  }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
65 static inline void check_loc(ident_t *&loc) {
66  if (loc == NULL)
67  loc = &loc_stub; // may need to report location info to ittnotify
68 }
69 
70 template <typename T>
71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72  kmp_int32 schedtype, kmp_int32 *plastiter,
73  T *plower, T *pupper,
74  typename traits_t<T>::signed_t *pstride,
75  typename traits_t<T>::signed_t incr,
76  typename traits_t<T>::signed_t chunk
77 #if OMPT_SUPPORT && OMPT_OPTIONAL
78  ,
79  void *codeptr
80 #endif
81 ) {
82  KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85 
86  typedef typename traits_t<T>::unsigned_t UT;
87  typedef typename traits_t<T>::signed_t ST;
88  /* this all has to be changed back to TID and such.. */
89  kmp_int32 gtid = global_tid;
90  kmp_uint32 tid;
91  kmp_uint32 nth;
92  UT trip_count;
93  kmp_team_t *team;
94  __kmp_assert_valid_gtid(gtid);
95  kmp_info_t *th = __kmp_threads[gtid];
96 
97 #if OMPT_SUPPORT && OMPT_OPTIONAL
98  ompt_team_info_t *team_info = NULL;
99  ompt_task_info_t *task_info = NULL;
100  ompt_work_t ompt_work_type = ompt_work_loop;
101 
102  static kmp_int8 warn = 0;
103 
104  if (ompt_enabled.ompt_callback_work) {
105  // Only fully initialize variables needed by OMPT if OMPT is enabled.
106  team_info = __ompt_get_teaminfo(0, NULL);
107  task_info = __ompt_get_task_info_object(0);
108  // Determine workshare type
109  if (loc != NULL) {
110  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111  ompt_work_type = ompt_work_loop;
112  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113  ompt_work_type = ompt_work_sections;
114  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115  ompt_work_type = ompt_work_distribute;
116  } else {
117  kmp_int8 bool_res =
118  KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119  if (bool_res)
120  KMP_WARNING(OmptOutdatedWorkshare);
121  }
122  KMP_DEBUG_ASSERT(ompt_work_type);
123  }
124  }
125 #endif
126 
127  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128  KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129 #ifdef KMP_DEBUG
130  {
131  char *buff;
132  // create format specifiers before the debug output
133  buff = __kmp_str_format(
134  "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135  " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137  traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138  KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139  *pstride, incr, chunk));
140  __kmp_str_free(&buff);
141  }
142 #endif
143 
144  if (__kmp_env_consistency_check) {
145  __kmp_push_workshare(global_tid, ct_pdo, loc);
146  if (incr == 0) {
147  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148  loc);
149  }
150  }
151  /* special handling for zero-trip loops */
152  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153  if (plastiter != NULL)
154  *plastiter = FALSE;
155  /* leave pupper and plower set to entire iteration space */
156  *pstride = incr; /* value should never be used */
157 // *plower = *pupper - incr;
158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161 #ifdef KMP_DEBUG
162  {
163  char *buff;
164  // create format specifiers before the debug output
165  buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166  "lower=%%%s upper=%%%s stride = %%%s "
167  "signed?<%s>, loc = %%s\n",
168  traits_t<T>::spec, traits_t<T>::spec,
169  traits_t<ST>::spec, traits_t<T>::spec);
170  check_loc(loc);
171  KD_TRACE(100,
172  (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
173  __kmp_str_free(&buff);
174  }
175 #endif
176  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
177 
178 #if OMPT_SUPPORT && OMPT_OPTIONAL
179  if (ompt_enabled.ompt_callback_work) {
180  ompt_callbacks.ompt_callback(ompt_callback_work)(
181  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
182  &(task_info->task_data), 0, codeptr);
183  }
184 #endif
185  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
186  return;
187  }
188 
189  // Although there are schedule enumerations above kmp_ord_upper which are not
190  // schedules for "distribute", the only ones which are useful are dynamic, so
191  // cannot be seen here, since this codepath is only executed for static
192  // schedules.
193  if (schedtype > kmp_ord_upper) {
194  // we are in DISTRIBUTE construct
195  schedtype += kmp_sch_static -
196  kmp_distribute_static; // AC: convert to usual schedule type
197  tid = th->th.th_team->t.t_master_tid;
198  team = th->th.th_team->t.t_parent;
199  } else {
200  tid = __kmp_tid_from_gtid(global_tid);
201  team = th->th.th_team;
202  }
203 
204  /* determine if "for" loop is an active worksharing construct */
205  if (team->t.t_serialized) {
206  /* serialized parallel, each thread executes whole iteration space */
207  if (plastiter != NULL)
208  *plastiter = TRUE;
209  /* leave pupper and plower set to entire iteration space */
210  *pstride =
211  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
212 
213 #ifdef KMP_DEBUG
214  {
215  char *buff;
216  // create format specifiers before the debug output
217  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
218  "lower=%%%s upper=%%%s stride = %%%s\n",
219  traits_t<T>::spec, traits_t<T>::spec,
220  traits_t<ST>::spec);
221  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
222  __kmp_str_free(&buff);
223  }
224 #endif
225  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
226 
227 #if OMPT_SUPPORT && OMPT_OPTIONAL
228  if (ompt_enabled.ompt_callback_work) {
229  ompt_callbacks.ompt_callback(ompt_callback_work)(
230  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
231  &(task_info->task_data), *pstride, codeptr);
232  }
233 #endif
234  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
235  return;
236  }
237  nth = team->t.t_nproc;
238  if (nth == 1) {
239  if (plastiter != NULL)
240  *plastiter = TRUE;
241  *pstride =
242  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
243 #ifdef KMP_DEBUG
244  {
245  char *buff;
246  // create format specifiers before the debug output
247  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
248  "lower=%%%s upper=%%%s stride = %%%s\n",
249  traits_t<T>::spec, traits_t<T>::spec,
250  traits_t<ST>::spec);
251  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
252  __kmp_str_free(&buff);
253  }
254 #endif
255  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
256 
257 #if OMPT_SUPPORT && OMPT_OPTIONAL
258  if (ompt_enabled.ompt_callback_work) {
259  ompt_callbacks.ompt_callback(ompt_callback_work)(
260  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
261  &(task_info->task_data), *pstride, codeptr);
262  }
263 #endif
264  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
265  return;
266  }
267 
268  /* compute trip count */
269  if (incr == 1) {
270  trip_count = *pupper - *plower + 1;
271  } else if (incr == -1) {
272  trip_count = *plower - *pupper + 1;
273  } else if (incr > 0) {
274  // upper-lower can exceed the limit of signed type
275  trip_count = (UT)(*pupper - *plower) / incr + 1;
276  } else {
277  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
278  }
279 
280 #if KMP_STATS_ENABLED
281  if (KMP_MASTER_GTID(gtid)) {
282  KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
283  }
284 #endif
285 
286  if (__kmp_env_consistency_check) {
287  /* tripcount overflow? */
288  if (trip_count == 0 && *pupper != *plower) {
289  __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
290  loc);
291  }
292  }
293 
294  /* compute remaining parameters */
295  switch (schedtype) {
296  case kmp_sch_static: {
297  if (trip_count < nth) {
298  KMP_DEBUG_ASSERT(
299  __kmp_static == kmp_sch_static_greedy ||
300  __kmp_static ==
301  kmp_sch_static_balanced); // Unknown static scheduling type.
302  if (tid < trip_count) {
303  *pupper = *plower = *plower + tid * incr;
304  } else {
305  // set bounds so non-active threads execute no iterations
306  *plower = *pupper + (incr > 0 ? 1 : -1);
307  }
308  if (plastiter != NULL)
309  *plastiter = (tid == trip_count - 1);
310  } else {
311  if (__kmp_static == kmp_sch_static_balanced) {
312  UT small_chunk = trip_count / nth;
313  UT extras = trip_count % nth;
314  *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
315  *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
316  if (plastiter != NULL)
317  *plastiter = (tid == nth - 1);
318  } else {
319  T big_chunk_inc_count =
320  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
321  T old_upper = *pupper;
322 
323  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
324  // Unknown static scheduling type.
325 
326  *plower += tid * big_chunk_inc_count;
327  *pupper = *plower + big_chunk_inc_count - incr;
328  if (incr > 0) {
329  if (*pupper < *plower)
330  *pupper = traits_t<T>::max_value;
331  if (plastiter != NULL)
332  *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
333  if (*pupper > old_upper)
334  *pupper = old_upper; // tracker C73258
335  } else {
336  if (*pupper > *plower)
337  *pupper = traits_t<T>::min_value;
338  if (plastiter != NULL)
339  *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
340  if (*pupper < old_upper)
341  *pupper = old_upper; // tracker C73258
342  }
343  }
344  }
345  *pstride = trip_count;
346  break;
347  }
348  case kmp_sch_static_chunked: {
349  ST span;
350  UT nchunks;
351  if (chunk < 1)
352  chunk = 1;
353  else if ((UT)chunk > trip_count)
354  chunk = trip_count;
355  nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
356  span = chunk * incr;
357  if (nchunks < nth) {
358  *pstride = span * nchunks;
359  if (tid < nchunks) {
360  *plower = *plower + (span * tid);
361  *pupper = *plower + span - incr;
362  } else {
363  *plower = *pupper + (incr > 0 ? 1 : -1);
364  }
365  } else {
366  *pstride = span * nth;
367  *plower = *plower + (span * tid);
368  *pupper = *plower + span - incr;
369  }
370  if (plastiter != NULL)
371  *plastiter = (tid == (nchunks - 1) % nth);
372  break;
373  }
374  case kmp_sch_static_balanced_chunked: {
375  T old_upper = *pupper;
376  // round up to make sure the chunk is enough to cover all iterations
377  UT span = (trip_count + nth - 1) / nth;
378 
379  // perform chunk adjustment
380  chunk = (span + chunk - 1) & ~(chunk - 1);
381 
382  span = chunk * incr;
383  *plower = *plower + (span * tid);
384  *pupper = *plower + span - incr;
385  if (incr > 0) {
386  if (*pupper > old_upper)
387  *pupper = old_upper;
388  } else if (*pupper < old_upper)
389  *pupper = old_upper;
390 
391  if (plastiter != NULL)
392  *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
393  break;
394  }
395  default:
396  KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
397  break;
398  }
399 
400 #if USE_ITT_BUILD
401  // Report loop metadata
402  if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
403  __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
404  team->t.t_active_level == 1) {
405  kmp_uint64 cur_chunk = chunk;
406  check_loc(loc);
407  // Calculate chunk in case it was not specified; it is specified for
408  // kmp_sch_static_chunked
409  if (schedtype == kmp_sch_static) {
410  cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
411  }
412  // 0 - "static" schedule
413  __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
414  }
415 #endif
416 #ifdef KMP_DEBUG
417  {
418  char *buff;
419  // create format specifiers before the debug output
420  buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
421  "upper=%%%s stride = %%%s signed?<%s>\n",
422  traits_t<T>::spec, traits_t<T>::spec,
423  traits_t<ST>::spec, traits_t<T>::spec);
424  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
425  __kmp_str_free(&buff);
426  }
427 #endif
428  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
429 
430 #if OMPT_SUPPORT && OMPT_OPTIONAL
431  if (ompt_enabled.ompt_callback_work) {
432  ompt_callbacks.ompt_callback(ompt_callback_work)(
433  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
434  &(task_info->task_data), trip_count, codeptr);
435  }
436 #endif
437 
438  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
439  return;
440 }
441 
442 template <typename T>
443 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
444  kmp_int32 schedule, kmp_int32 *plastiter,
445  T *plower, T *pupper, T *pupperDist,
446  typename traits_t<T>::signed_t *pstride,
447  typename traits_t<T>::signed_t incr,
448  typename traits_t<T>::signed_t chunk) {
449  KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
450  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
451  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
452  typedef typename traits_t<T>::unsigned_t UT;
453  typedef typename traits_t<T>::signed_t ST;
454  kmp_uint32 tid;
455  kmp_uint32 nth;
456  kmp_uint32 team_id;
457  kmp_uint32 nteams;
458  UT trip_count;
459  kmp_team_t *team;
460  kmp_info_t *th;
461 
462  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
463  KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
464  __kmp_assert_valid_gtid(gtid);
465 #ifdef KMP_DEBUG
466  {
467  char *buff;
468  // create format specifiers before the debug output
469  buff = __kmp_str_format(
470  "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
471  "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
472  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
473  traits_t<ST>::spec, traits_t<T>::spec);
474  KD_TRACE(100,
475  (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
476  __kmp_str_free(&buff);
477  }
478 #endif
479 
480  if (__kmp_env_consistency_check) {
481  __kmp_push_workshare(gtid, ct_pdo, loc);
482  if (incr == 0) {
483  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
484  loc);
485  }
486  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
487  // The loop is illegal.
488  // Some zero-trip loops maintained by compiler, e.g.:
489  // for(i=10;i<0;++i) // lower >= upper - run-time check
490  // for(i=0;i>10;--i) // lower <= upper - run-time check
491  // for(i=0;i>10;++i) // incr > 0 - compile-time check
492  // for(i=10;i<0;--i) // incr < 0 - compile-time check
493  // Compiler does not check the following illegal loops:
494  // for(i=0;i<10;i+=incr) // where incr<0
495  // for(i=10;i>0;i-=incr) // where incr<0
496  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
497  }
498  }
499  tid = __kmp_tid_from_gtid(gtid);
500  th = __kmp_threads[gtid];
501  nth = th->th.th_team_nproc;
502  team = th->th.th_team;
503  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
504  nteams = th->th.th_teams_size.nteams;
505  team_id = team->t.t_master_tid;
506  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
507 
508  // compute global trip count
509  if (incr == 1) {
510  trip_count = *pupper - *plower + 1;
511  } else if (incr == -1) {
512  trip_count = *plower - *pupper + 1;
513  } else if (incr > 0) {
514  // upper-lower can exceed the limit of signed type
515  trip_count = (UT)(*pupper - *plower) / incr + 1;
516  } else {
517  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
518  }
519 
520  *pstride = *pupper - *plower; // just in case (can be unused)
521  if (trip_count <= nteams) {
522  KMP_DEBUG_ASSERT(
523  __kmp_static == kmp_sch_static_greedy ||
524  __kmp_static ==
525  kmp_sch_static_balanced); // Unknown static scheduling type.
526  // only primary threads of some teams get single iteration, other threads
527  // get nothing
528  if (team_id < trip_count && tid == 0) {
529  *pupper = *pupperDist = *plower = *plower + team_id * incr;
530  } else {
531  *pupperDist = *pupper;
532  *plower = *pupper + incr; // compiler should skip loop body
533  }
534  if (plastiter != NULL)
535  *plastiter = (tid == 0 && team_id == trip_count - 1);
536  } else {
537  // Get the team's chunk first (each team gets at most one chunk)
538  if (__kmp_static == kmp_sch_static_balanced) {
539  UT chunkD = trip_count / nteams;
540  UT extras = trip_count % nteams;
541  *plower +=
542  incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
543  *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
544  if (plastiter != NULL)
545  *plastiter = (team_id == nteams - 1);
546  } else {
547  T chunk_inc_count =
548  (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
549  T upper = *pupper;
550  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
551  // Unknown static scheduling type.
552  *plower += team_id * chunk_inc_count;
553  *pupperDist = *plower + chunk_inc_count - incr;
554  // Check/correct bounds if needed
555  if (incr > 0) {
556  if (*pupperDist < *plower)
557  *pupperDist = traits_t<T>::max_value;
558  if (plastiter != NULL)
559  *plastiter = *plower <= upper && *pupperDist > upper - incr;
560  if (*pupperDist > upper)
561  *pupperDist = upper; // tracker C73258
562  if (*plower > *pupperDist) {
563  *pupper = *pupperDist; // no iterations available for the team
564  goto end;
565  }
566  } else {
567  if (*pupperDist > *plower)
568  *pupperDist = traits_t<T>::min_value;
569  if (plastiter != NULL)
570  *plastiter = *plower >= upper && *pupperDist < upper - incr;
571  if (*pupperDist < upper)
572  *pupperDist = upper; // tracker C73258
573  if (*plower < *pupperDist) {
574  *pupper = *pupperDist; // no iterations available for the team
575  goto end;
576  }
577  }
578  }
579  // Get the parallel loop chunk now (for thread)
580  // compute trip count for team's chunk
581  if (incr == 1) {
582  trip_count = *pupperDist - *plower + 1;
583  } else if (incr == -1) {
584  trip_count = *plower - *pupperDist + 1;
585  } else if (incr > 1) {
586  // upper-lower can exceed the limit of signed type
587  trip_count = (UT)(*pupperDist - *plower) / incr + 1;
588  } else {
589  trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
590  }
591  KMP_DEBUG_ASSERT(trip_count);
592  switch (schedule) {
593  case kmp_sch_static: {
594  if (trip_count <= nth) {
595  KMP_DEBUG_ASSERT(
596  __kmp_static == kmp_sch_static_greedy ||
597  __kmp_static ==
598  kmp_sch_static_balanced); // Unknown static scheduling type.
599  if (tid < trip_count)
600  *pupper = *plower = *plower + tid * incr;
601  else
602  *plower = *pupper + incr; // no iterations available
603  if (plastiter != NULL)
604  if (*plastiter != 0 && !(tid == trip_count - 1))
605  *plastiter = 0;
606  } else {
607  if (__kmp_static == kmp_sch_static_balanced) {
608  UT chunkL = trip_count / nth;
609  UT extras = trip_count % nth;
610  *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
611  *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
612  if (plastiter != NULL)
613  if (*plastiter != 0 && !(tid == nth - 1))
614  *plastiter = 0;
615  } else {
616  T chunk_inc_count =
617  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
618  T upper = *pupperDist;
619  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
620  // Unknown static scheduling type.
621  *plower += tid * chunk_inc_count;
622  *pupper = *plower + chunk_inc_count - incr;
623  if (incr > 0) {
624  if (*pupper < *plower)
625  *pupper = traits_t<T>::max_value;
626  if (plastiter != NULL)
627  if (*plastiter != 0 &&
628  !(*plower <= upper && *pupper > upper - incr))
629  *plastiter = 0;
630  if (*pupper > upper)
631  *pupper = upper; // tracker C73258
632  } else {
633  if (*pupper > *plower)
634  *pupper = traits_t<T>::min_value;
635  if (plastiter != NULL)
636  if (*plastiter != 0 &&
637  !(*plower >= upper && *pupper < upper - incr))
638  *plastiter = 0;
639  if (*pupper < upper)
640  *pupper = upper; // tracker C73258
641  }
642  }
643  }
644  break;
645  }
646  case kmp_sch_static_chunked: {
647  ST span;
648  if (chunk < 1)
649  chunk = 1;
650  span = chunk * incr;
651  *pstride = span * nth;
652  *plower = *plower + (span * tid);
653  *pupper = *plower + span - incr;
654  if (plastiter != NULL)
655  if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
656  *plastiter = 0;
657  break;
658  }
659  default:
660  KMP_ASSERT2(0,
661  "__kmpc_dist_for_static_init: unknown loop scheduling type");
662  break;
663  }
664  }
665 end:;
666 #ifdef KMP_DEBUG
667  {
668  char *buff;
669  // create format specifiers before the debug output
670  buff = __kmp_str_format(
671  "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
672  "stride=%%%s signed?<%s>\n",
673  traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
674  traits_t<ST>::spec, traits_t<T>::spec);
675  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
676  __kmp_str_free(&buff);
677  }
678 #endif
679  KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
680  KMP_STATS_LOOP_END(OMP_distribute_iterations);
681  return;
682 }
683 
684 template <typename T>
685 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
686  kmp_int32 *p_last, T *p_lb, T *p_ub,
687  typename traits_t<T>::signed_t *p_st,
688  typename traits_t<T>::signed_t incr,
689  typename traits_t<T>::signed_t chunk) {
690  // The routine returns the first chunk distributed to the team and
691  // stride for next chunks calculation.
692  // Last iteration flag set for the team that will execute
693  // the last iteration of the loop.
694  // The routine is called for dist_schedule(static,chunk) only.
695  typedef typename traits_t<T>::unsigned_t UT;
696  typedef typename traits_t<T>::signed_t ST;
697  kmp_uint32 team_id;
698  kmp_uint32 nteams;
699  UT trip_count;
700  T lower;
701  T upper;
702  ST span;
703  kmp_team_t *team;
704  kmp_info_t *th;
705 
706  KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
707  KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
708  __kmp_assert_valid_gtid(gtid);
709 #ifdef KMP_DEBUG
710  {
711  char *buff;
712  // create format specifiers before the debug output
713  buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
714  "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
715  traits_t<T>::spec, traits_t<T>::spec,
716  traits_t<ST>::spec, traits_t<ST>::spec,
717  traits_t<T>::spec);
718  KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
719  __kmp_str_free(&buff);
720  }
721 #endif
722 
723  lower = *p_lb;
724  upper = *p_ub;
725  if (__kmp_env_consistency_check) {
726  if (incr == 0) {
727  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
728  loc);
729  }
730  if (incr > 0 ? (upper < lower) : (lower < upper)) {
731  // The loop is illegal.
732  // Some zero-trip loops maintained by compiler, e.g.:
733  // for(i=10;i<0;++i) // lower >= upper - run-time check
734  // for(i=0;i>10;--i) // lower <= upper - run-time check
735  // for(i=0;i>10;++i) // incr > 0 - compile-time check
736  // for(i=10;i<0;--i) // incr < 0 - compile-time check
737  // Compiler does not check the following illegal loops:
738  // for(i=0;i<10;i+=incr) // where incr<0
739  // for(i=10;i>0;i-=incr) // where incr<0
740  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
741  }
742  }
743  th = __kmp_threads[gtid];
744  team = th->th.th_team;
745  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
746  nteams = th->th.th_teams_size.nteams;
747  team_id = team->t.t_master_tid;
748  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
749 
750  // compute trip count
751  if (incr == 1) {
752  trip_count = upper - lower + 1;
753  } else if (incr == -1) {
754  trip_count = lower - upper + 1;
755  } else if (incr > 0) {
756  // upper-lower can exceed the limit of signed type
757  trip_count = (UT)(upper - lower) / incr + 1;
758  } else {
759  trip_count = (UT)(lower - upper) / (-incr) + 1;
760  }
761  if (chunk < 1)
762  chunk = 1;
763  span = chunk * incr;
764  *p_st = span * nteams;
765  *p_lb = lower + (span * team_id);
766  *p_ub = *p_lb + span - incr;
767  if (p_last != NULL)
768  *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
769  // Correct upper bound if needed
770  if (incr > 0) {
771  if (*p_ub < *p_lb) // overflow?
772  *p_ub = traits_t<T>::max_value;
773  if (*p_ub > upper)
774  *p_ub = upper; // tracker C73258
775  } else { // incr < 0
776  if (*p_ub > *p_lb)
777  *p_ub = traits_t<T>::min_value;
778  if (*p_ub < upper)
779  *p_ub = upper; // tracker C73258
780  }
781 #ifdef KMP_DEBUG
782  {
783  char *buff;
784  // create format specifiers before the debug output
785  buff =
786  __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
787  "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
788  traits_t<T>::spec, traits_t<T>::spec,
789  traits_t<ST>::spec, traits_t<ST>::spec);
790  KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
791  __kmp_str_free(&buff);
792  }
793 #endif
794 }
795 
796 //------------------------------------------------------------------------------
797 extern "C" {
819 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
820  kmp_int32 *plastiter, kmp_int32 *plower,
821  kmp_int32 *pupper, kmp_int32 *pstride,
822  kmp_int32 incr, kmp_int32 chunk) {
823  __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
824  pupper, pstride, incr, chunk
825 #if OMPT_SUPPORT && OMPT_OPTIONAL
826  ,
827  OMPT_GET_RETURN_ADDRESS(0)
828 #endif
829  );
830 }
831 
835 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
836  kmp_int32 schedtype, kmp_int32 *plastiter,
837  kmp_uint32 *plower, kmp_uint32 *pupper,
838  kmp_int32 *pstride, kmp_int32 incr,
839  kmp_int32 chunk) {
840  __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
841  pupper, pstride, incr, chunk
842 #if OMPT_SUPPORT && OMPT_OPTIONAL
843  ,
844  OMPT_GET_RETURN_ADDRESS(0)
845 #endif
846  );
847 }
848 
852 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
853  kmp_int32 *plastiter, kmp_int64 *plower,
854  kmp_int64 *pupper, kmp_int64 *pstride,
855  kmp_int64 incr, kmp_int64 chunk) {
856  __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
857  pupper, pstride, incr, chunk
858 #if OMPT_SUPPORT && OMPT_OPTIONAL
859  ,
860  OMPT_GET_RETURN_ADDRESS(0)
861 #endif
862  );
863 }
864 
868 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
869  kmp_int32 schedtype, kmp_int32 *plastiter,
870  kmp_uint64 *plower, kmp_uint64 *pupper,
871  kmp_int64 *pstride, kmp_int64 incr,
872  kmp_int64 chunk) {
873  __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
874  pupper, pstride, incr, chunk
875 #if OMPT_SUPPORT && OMPT_OPTIONAL
876  ,
877  OMPT_GET_RETURN_ADDRESS(0)
878 #endif
879  );
880 }
907 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
908  kmp_int32 schedule, kmp_int32 *plastiter,
909  kmp_int32 *plower, kmp_int32 *pupper,
910  kmp_int32 *pupperD, kmp_int32 *pstride,
911  kmp_int32 incr, kmp_int32 chunk) {
912  __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
913  pupper, pupperD, pstride, incr, chunk);
914 }
915 
919 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
920  kmp_int32 schedule, kmp_int32 *plastiter,
921  kmp_uint32 *plower, kmp_uint32 *pupper,
922  kmp_uint32 *pupperD, kmp_int32 *pstride,
923  kmp_int32 incr, kmp_int32 chunk) {
924  __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
925  pupper, pupperD, pstride, incr, chunk);
926 }
927 
931 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
932  kmp_int32 schedule, kmp_int32 *plastiter,
933  kmp_int64 *plower, kmp_int64 *pupper,
934  kmp_int64 *pupperD, kmp_int64 *pstride,
935  kmp_int64 incr, kmp_int64 chunk) {
936  __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
937  pupper, pupperD, pstride, incr, chunk);
938 }
939 
943 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
944  kmp_int32 schedule, kmp_int32 *plastiter,
945  kmp_uint64 *plower, kmp_uint64 *pupper,
946  kmp_uint64 *pupperD, kmp_int64 *pstride,
947  kmp_int64 incr, kmp_int64 chunk) {
948  __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
949  pupper, pupperD, pstride, incr, chunk);
950 }
955 //------------------------------------------------------------------------------
956 // Auxiliary routines for Distribute Parallel Loop construct implementation
957 // Transfer call to template< type T >
958 // __kmp_team_static_init( ident_t *loc, int gtid,
959 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
960 
981 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
982  kmp_int32 *p_lb, kmp_int32 *p_ub,
983  kmp_int32 *p_st, kmp_int32 incr,
984  kmp_int32 chunk) {
985  KMP_DEBUG_ASSERT(__kmp_init_serial);
986  __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
987  chunk);
988 }
989 
993 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
994  kmp_uint32 *p_lb, kmp_uint32 *p_ub,
995  kmp_int32 *p_st, kmp_int32 incr,
996  kmp_int32 chunk) {
997  KMP_DEBUG_ASSERT(__kmp_init_serial);
998  __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
999  chunk);
1000 }
1001 
1005 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1006  kmp_int64 *p_lb, kmp_int64 *p_ub,
1007  kmp_int64 *p_st, kmp_int64 incr,
1008  kmp_int64 chunk) {
1009  KMP_DEBUG_ASSERT(__kmp_init_serial);
1010  __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1011  chunk);
1012 }
1013 
1017 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1018  kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1019  kmp_int64 *p_st, kmp_int64 incr,
1020  kmp_int64 chunk) {
1021  KMP_DEBUG_ASSERT(__kmp_init_serial);
1022  __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1023  chunk);
1024 }
1029 } // extern "C"
@ KMP_IDENT_KMPC
Definition: kmp.h:196
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:895
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:908
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:852
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:931
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:835
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1005
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:919
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1017
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:868
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:993
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:907
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:819
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:981
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:943
@ kmp_sch_static
Definition: kmp.h:360
@ kmp_distribute_static
Definition: kmp.h:396
@ kmp_ord_upper
Definition: kmp.h:392
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244
kmp_int32 flags
Definition: kmp.h:236