LLVM OpenMP* Runtime Library
kmp_barrier.h
1/*
2 * kmp_barrier.h
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_BARRIER_H
14#define KMP_BARRIER_H
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18
19#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
20#include <xmmintrin.h>
21#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
22#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
23#elif KMP_HAVE_ALIGNED_ALLOC
24#define KMP_ALIGNED_ALLOCATE(size, alignment) aligned_alloc(alignment, size)
25#define KMP_ALIGNED_FREE(ptr) free(ptr)
26#elif KMP_HAVE_POSIX_MEMALIGN
27static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
28 void *ptr;
29 int n = posix_memalign(&ptr, alignment, size);
30 if (n != 0) {
31 if (ptr)
32 free(ptr);
33 return nullptr;
34 }
35 return ptr;
36}
37#define KMP_ALIGNED_FREE(ptr) free(ptr)
38#elif KMP_HAVE__ALIGNED_MALLOC
39#include <malloc.h>
40#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
41#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
42#else
43#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
44#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
45#endif
46
47// Use four cache lines: MLC tends to prefetch the next or previous cache line
48// creating a possible fake conflict between cores, so this is the only way to
49// guarantee that no such prefetch can happen.
50#ifndef KMP_FOURLINE_ALIGN_CACHE
51#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
52#endif
53
54#define KMP_OPTIMIZE_FOR_REDUCTIONS 0
55
56class distributedBarrier {
57 struct flags_s {
58 kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
59 };
60
61 struct go_s {
62 std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
63 };
64
65 struct iter_s {
66 kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
67 };
68
69 struct sleep_s {
70 std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
71 };
72
73 void init(size_t nthr);
74 void resize(size_t nthr);
75 void computeGo(size_t n);
76 void computeVarsForN(size_t n);
77
78public:
79 enum {
80 MAX_ITERS = 3,
81 MAX_GOS = 8,
82 IDEAL_GOS = 4,
83 IDEAL_CONTENTION = 16,
84 };
85
86 flags_s *flags[MAX_ITERS];
87 go_s *go;
88 iter_s *iter;
89 sleep_s *sleep;
90
91 size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
92 size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
93 // number of go signals each requiring one write per iteration
94 size_t KMP_ALIGN_CACHE num_gos;
95 // number of groups of gos
96 size_t KMP_ALIGN_CACHE num_groups;
97 // threads per go signal
98 size_t KMP_ALIGN_CACHE threads_per_go;
99 bool KMP_ALIGN_CACHE fix_threads_per_go;
100 // threads per group
101 size_t KMP_ALIGN_CACHE threads_per_group;
102 // number of go signals in a group
103 size_t KMP_ALIGN_CACHE gos_per_group;
104 void *team_icvs;
105
106 distributedBarrier() = delete;
107 ~distributedBarrier() = delete;
108
109 // Used instead of constructor to create aligned data
110 static distributedBarrier *allocate(int nThreads) {
111 distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
112 sizeof(distributedBarrier), 4 * CACHE_LINE);
113 if (!d) {
114 KMP_FATAL(MemoryAllocFailed);
115 }
116 d->num_threads = 0;
117 d->max_threads = 0;
118 for (int i = 0; i < MAX_ITERS; ++i)
119 d->flags[i] = NULL;
120 d->go = NULL;
121 d->iter = NULL;
122 d->sleep = NULL;
123 d->team_icvs = NULL;
124 d->fix_threads_per_go = false;
125 // calculate gos and groups ONCE on base size
126 d->computeGo(nThreads);
127 d->init(nThreads);
128 return d;
129 }
130
131 static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
132
133 void update_num_threads(size_t nthr) { init(nthr); }
134
135 bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
136 size_t get_num_threads() { return num_threads; }
137 kmp_uint64 go_release();
138 void go_reset();
139};
140
141#endif // KMP_BARRIER_H