Home ⌂Doc Index ◂Up ▴
Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbb_misc_ex.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2020 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 // Source file for miscellaneous entities that are infrequently referenced by
18 // an executing program, and implementation of which requires dynamic linking.
19 
20 #include "tbb_misc.h"
21 
22 #if !defined(__TBB_HardwareConcurrency)
23 
24 #include "dynamic_link.h"
25 #include <stdio.h>
26 #include <limits.h>
27 
28 #if _WIN32||_WIN64
30 #if __TBB_WIN8UI_SUPPORT
31 #include <thread>
32 #endif
33 #else
34 #include <unistd.h>
35 #if __linux__
36 #include <sys/sysinfo.h>
37 #include <string.h>
38 #include <sched.h>
39 #include <errno.h>
40 #elif __sun
41 #include <sys/sysinfo.h>
42 #elif __FreeBSD__
43 #include <errno.h>
44 #include <string.h>
45 #include <sys/param.h> // Required by <sys/cpuset.h>
46 #include <sys/cpuset.h>
47 #endif
48 #endif
49 
50 namespace tbb {
51 namespace internal {
52 
53 #if __TBB_USE_OS_AFFINITY_SYSCALL
54 
55 #if __linux__
56 // Handlers for interoperation with libiomp
57 static int (*libiomp_try_restoring_original_mask)();
58 // Table for mapping to libiomp entry points
59 static const dynamic_link_descriptor iompLinkTable[] = {
60  DLD_NOWEAK( kmp_set_thread_affinity_mask_initial, libiomp_try_restoring_original_mask )
61 };
62 #endif
63 
64 static void set_thread_affinity_mask( size_t maskSize, const basic_mask_t* threadMask ) {
65 #if __linux__
66  if( sched_setaffinity( 0, maskSize, threadMask ) )
67 #else /* FreeBSD */
68  if( cpuset_setaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
69 #endif
70  runtime_warning( "setaffinity syscall failed" );
71 }
72 
73 static void get_thread_affinity_mask( size_t maskSize, basic_mask_t* threadMask ) {
74 #if __linux__
75  if( sched_getaffinity( 0, maskSize, threadMask ) )
76 #else /* FreeBSD */
77  if( cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
78 #endif
79  runtime_warning( "getaffinity syscall failed" );
80 }
81 
82 static basic_mask_t* process_mask;
83 static int num_masks;
84 
85 void destroy_process_mask() {
86  if( process_mask ) {
87  delete [] process_mask;
88  }
89 }
90 
91 #define curMaskSize sizeof(basic_mask_t) * num_masks
92 affinity_helper::~affinity_helper() {
93  if( threadMask ) {
94  if( is_changed ) {
95  set_thread_affinity_mask( curMaskSize, threadMask );
96  }
97  delete [] threadMask;
98  }
99 }
100 void affinity_helper::protect_affinity_mask( bool restore_process_mask ) {
101  if( threadMask == NULL && num_masks ) { // TODO: assert num_masks validity?
102  threadMask = new basic_mask_t [num_masks];
103  memset( threadMask, 0, curMaskSize );
104  get_thread_affinity_mask( curMaskSize, threadMask );
105  if( restore_process_mask ) {
106  __TBB_ASSERT( process_mask, "A process mask is requested but not yet stored" );
107  is_changed = memcmp( process_mask, threadMask, curMaskSize );
108  if( is_changed )
109  set_thread_affinity_mask( curMaskSize, process_mask );
110  } else {
111  // Assume that the mask will be changed by the caller.
112  is_changed = 1;
113  }
114  }
115 }
117  if( threadMask ) {
118  delete [] threadMask;
119  threadMask = NULL;
120  }
121  is_changed = 0;
122 }
123 #undef curMaskSize
124 
125 static atomic<do_once_state> hardware_concurrency_info;
126 
127 static int theNumProcs;
128 
129 static void initialize_hardware_concurrency_info () {
130  int err;
131  int availableProcs = 0;
132  int numMasks = 1;
133 #if __linux__
134 #if __TBB_MAIN_THREAD_AFFINITY_BROKEN
135  int maxProcs = INT_MAX; // To check the entire mask.
136  int pid = 0; // Get the mask of the calling thread.
137 #else
138  int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
139  int pid = getpid();
140 #endif
141 #else /* FreeBSD >= 7.1 */
142  int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
143 #endif
144  basic_mask_t* processMask;
145  const size_t BasicMaskSize = sizeof(basic_mask_t);
146  for (;;) {
147  const int curMaskSize = BasicMaskSize * numMasks;
148  processMask = new basic_mask_t[numMasks];
149  memset( processMask, 0, curMaskSize );
150 #if __linux__
151  err = sched_getaffinity( pid, curMaskSize, processMask );
152  if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 )
153  break;
154 #else /* FreeBSD >= 7.1 */
155  // CPU_LEVEL_WHICH - anonymous (current) mask, CPU_LEVEL_CPUSET - assigned mask
156 #if __TBB_MAIN_THREAD_AFFINITY_BROKEN
157  err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, curMaskSize, processMask );
158 #else
159  err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask );
160 #endif
161  if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 )
162  break;
163 #endif /* FreeBSD >= 7.1 */
164  delete[] processMask;
165  numMasks <<= 1;
166  }
167  if ( !err ) {
168  // We have found the mask size and captured the process affinity mask into processMask.
169  num_masks = numMasks; // do here because it's needed for affinity_helper to work
170 #if __linux__
171  // For better coexistence with libiomp which might have changed the mask already,
172  // check for its presence and ask it to restore the mask.
173  dynamic_link_handle libhandle;
174  if ( dynamic_link( "libiomp5.so", iompLinkTable, 1, &libhandle, DYNAMIC_LINK_GLOBAL ) ) {
175  // We have found the symbol provided by libiomp5 for restoring original thread affinity.
176  affinity_helper affhelp;
177  affhelp.protect_affinity_mask( /*restore_process_mask=*/false );
178  if ( libiomp_try_restoring_original_mask()==0 ) {
179  // Now we have the right mask to capture, restored by libiomp.
180  const int curMaskSize = BasicMaskSize * numMasks;
181  memset( processMask, 0, curMaskSize );
182  get_thread_affinity_mask( curMaskSize, processMask );
183  } else
184  affhelp.dismiss(); // thread mask has not changed
185  dynamic_unlink( libhandle );
186  // Destructor of affinity_helper restores the thread mask (unless dismissed).
187  }
188 #endif
189  for ( int m = 0; availableProcs < maxProcs && m < numMasks; ++m ) {
190  for ( size_t i = 0; (availableProcs < maxProcs) && (i < BasicMaskSize * CHAR_BIT); ++i ) {
191  if ( CPU_ISSET( i, processMask + m ) )
192  ++availableProcs;
193  }
194  }
195  process_mask = processMask;
196  }
197  else {
198  // Failed to get the process affinity mask; assume the whole machine can be used.
199  availableProcs = (maxProcs == INT_MAX) ? sysconf(_SC_NPROCESSORS_ONLN) : maxProcs;
200  delete[] processMask;
201  }
202  theNumProcs = availableProcs > 0 ? availableProcs : 1; // Fail safety strap
203  __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), NULL );
204 }
205 
207  atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
208  return theNumProcs;
209 }
210 
211 /* End of __TBB_USE_OS_AFFINITY_SYSCALL implementation */
212 #elif __ANDROID__
213 
214 // Work-around for Android that reads the correct number of available CPUs since system calls are unreliable.
215 // Format of "present" file is: ([<int>-<int>|<int>],)+
217  FILE *fp = fopen("/sys/devices/system/cpu/present", "r");
218  if (fp == NULL) return 1;
219  int num_args, lower, upper, num_cpus=0;
220  while ((num_args = fscanf(fp, "%u-%u", &lower, &upper)) != EOF) {
221  switch(num_args) {
222  case 2: num_cpus += upper - lower + 1; break;
223  case 1: num_cpus += 1; break;
224  }
225  fscanf(fp, ",");
226  }
227  return (num_cpus > 0) ? num_cpus : 1;
228 }
229 
230 #elif defined(_SC_NPROCESSORS_ONLN)
231 
233  int n = sysconf(_SC_NPROCESSORS_ONLN);
234  return (n > 0) ? n : 1;
235 }
236 
237 #elif _WIN32||_WIN64
238 
239 static atomic<do_once_state> hardware_concurrency_info;
240 
241 static const WORD TBB_ALL_PROCESSOR_GROUPS = 0xffff;
242 
243 // Statically allocate an array for processor group information.
244 // Windows 7 supports maximum 4 groups, but let's look ahead a little.
245 static const WORD MaxProcessorGroups = 64;
246 
247 struct ProcessorGroupInfo {
248  DWORD_PTR mask;
249  int numProcs;
250  int numProcsRunningTotal;
251 
253  static int NumGroups;
254 
256 
262  static int HoleIndex;
263 };
264 
265 int ProcessorGroupInfo::NumGroups = 1;
266 int ProcessorGroupInfo::HoleIndex = 0;
267 
268 ProcessorGroupInfo theProcessorGroups[MaxProcessorGroups];
269 
270 struct TBB_GROUP_AFFINITY {
271  DWORD_PTR Mask;
272  WORD Group;
273  WORD Reserved[3];
274 };
275 
276 static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = NULL;
277 static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = NULL;
278 static BOOL (WINAPI *TBB_SetThreadGroupAffinity)( HANDLE hThread,
279  const TBB_GROUP_AFFINITY* newAff, TBB_GROUP_AFFINITY *prevAff );
280 static BOOL (WINAPI *TBB_GetThreadGroupAffinity)( HANDLE hThread, TBB_GROUP_AFFINITY* );
281 
282 static const dynamic_link_descriptor ProcessorGroupsApiLinkTable[] = {
283  DLD(GetActiveProcessorCount, TBB_GetActiveProcessorCount)
284  , DLD(GetActiveProcessorGroupCount, TBB_GetActiveProcessorGroupCount)
285  , DLD(SetThreadGroupAffinity, TBB_SetThreadGroupAffinity)
286  , DLD(GetThreadGroupAffinity, TBB_GetThreadGroupAffinity)
287 };
288 
289 static void initialize_hardware_concurrency_info () {
290 #if __TBB_WIN8UI_SUPPORT
291  // For these applications processor groups info is unavailable
292  // Setting up a number of processors for one processor group
293  theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = std::thread::hardware_concurrency();
294 #else /* __TBB_WIN8UI_SUPPORT */
295  dynamic_link( "Kernel32.dll", ProcessorGroupsApiLinkTable,
296  sizeof(ProcessorGroupsApiLinkTable)/sizeof(dynamic_link_descriptor) );
297  SYSTEM_INFO si;
298  GetNativeSystemInfo(&si);
299  DWORD_PTR pam, sam, m = 1;
300  GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam );
301  int nproc = 0;
302  for ( size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) {
303  if ( pam & m )
304  ++nproc;
305  }
306  __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, NULL );
307  // By default setting up a number of processors for one processor group
308  theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc;
309  // Setting up processor groups in case the process does not restrict affinity mask and more than one processor group is present
310  if ( nproc == (int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) {
311  // The process does not have restricting affinity mask and multiple processor groups are possible
312  ProcessorGroupInfo::NumGroups = (int)TBB_GetActiveProcessorGroupCount();
313  __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL );
314  // Fail safety bootstrap. Release versions will limit available concurrency
315  // level, while debug ones would assert.
316  if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups )
317  ProcessorGroupInfo::NumGroups = MaxProcessorGroups;
318  if ( ProcessorGroupInfo::NumGroups > 1 ) {
319  TBB_GROUP_AFFINITY ga;
320  if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) )
321  ProcessorGroupInfo::HoleIndex = ga.Group;
322  int nprocs = 0;
323  for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) {
324  ProcessorGroupInfo &pgi = theProcessorGroups[i];
325  pgi.numProcs = (int)TBB_GetActiveProcessorCount(i);
326  __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, NULL );
327  pgi.mask = pgi.numProcs == sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1;
328  pgi.numProcsRunningTotal = nprocs += pgi.numProcs;
329  }
330  __TBB_ASSERT( nprocs == (int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL );
331  }
332  }
333 #endif /* __TBB_WIN8UI_SUPPORT */
334 
335  PrintExtraVersionInfo("Processor groups", "%d", ProcessorGroupInfo::NumGroups);
336  if (ProcessorGroupInfo::NumGroups>1)
337  for (int i=0; i<ProcessorGroupInfo::NumGroups; ++i)
338  PrintExtraVersionInfo( "----- Group", "%d: size %d", i, theProcessorGroups[i].numProcs);
339 }
340 
341 int NumberOfProcessorGroups() {
342  __TBB_ASSERT( hardware_concurrency_info == initialization_complete, "NumberOfProcessorGroups is used before AvailableHwConcurrency" );
343  return ProcessorGroupInfo::NumGroups;
344 }
345 
346 // Offset for the slot reserved for the first master thread
347 #define HoleAdjusted(procIdx, grpIdx) (procIdx + (holeIdx <= grpIdx))
348 
349 int FindProcessorGroupIndex ( int procIdx ) {
350  // In case of oversubscription spread extra workers in a round robin manner
351  int holeIdx;
352  const int numProcs = theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
353  if ( procIdx >= numProcs - 1 ) {
354  holeIdx = INT_MAX;
355  procIdx = (procIdx - numProcs + 1) % numProcs;
356  }
357  else
358  holeIdx = ProcessorGroupInfo::HoleIndex;
359  __TBB_ASSERT( hardware_concurrency_info == initialization_complete, "FindProcessorGroupIndex is used before AvailableHwConcurrency" );
360  // Approximate the likely group index assuming all groups are of the same size
361  int i = procIdx / theProcessorGroups[0].numProcs;
362  // Make sure the approximation is a valid group index
363  if (i >= ProcessorGroupInfo::NumGroups) i = ProcessorGroupInfo::NumGroups-1;
364  // Now adjust the approximation up or down
365  if ( theProcessorGroups[i].numProcsRunningTotal > HoleAdjusted(procIdx, i) ) {
366  while ( theProcessorGroups[i].numProcsRunningTotal - theProcessorGroups[i].numProcs > HoleAdjusted(procIdx, i) ) {
367  __TBB_ASSERT( i > 0, NULL );
368  --i;
369  }
370  }
371  else {
372  do {
373  ++i;
374  } while ( theProcessorGroups[i].numProcsRunningTotal <= HoleAdjusted(procIdx, i) );
375  }
376  __TBB_ASSERT( i < ProcessorGroupInfo::NumGroups, NULL );
377  return i;
378 }
379 
380 void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ) {
381  __TBB_ASSERT( hardware_concurrency_info == initialization_complete, "MoveThreadIntoProcessorGroup is used before AvailableHwConcurrency" );
382  if ( !TBB_SetThreadGroupAffinity )
383  return;
384  TBB_GROUP_AFFINITY ga = { theProcessorGroups[groupIndex].mask, (WORD)groupIndex, {0,0,0} };
385  TBB_SetThreadGroupAffinity( hThread, &ga, NULL );
386 }
387 
389  atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
390  return theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
391 }
392 
393 /* End of _WIN32||_WIN64 implementation */
394 #else
395  #error AvailableHwConcurrency is not implemented for this OS
396 #endif
397 
398 } // namespace internal
399 } // namespace tbb
400 
401 #endif /* !__TBB_HardwareConcurrency */
void dynamic_unlink(dynamic_link_handle)
int AvailableHwConcurrency()
Returns maximal parallelism level supported by the current OS configuration.
Association between a handler name and location of pointer to it.
Definition: dynamic_link.h:60
const int DYNAMIC_LINK_GLOBAL
Definition: dynamic_link.h:77
void * dynamic_link_handle
Definition: dynamic_link.h:74
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
void atomic_do_once(const F &initializer, atomic< do_once_state > &state)
One-time initialization function.
Definition: tbb_misc.h:213
void destroy_process_mask()
Definition: tbb_misc.h:266
#define DLD_NOWEAK(s, h)
Definition: dynamic_link.h:57
OPEN_INTERNAL_NAMESPACE bool dynamic_link(const char *, const dynamic_link_descriptor *, size_t, dynamic_link_handle *handle, int)
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
#define DLD(s, h)
The helper to construct dynamic_link_descriptor structure.
Definition: dynamic_link.h:56
The graph class.
void __TBB_EXPORTED_FUNC runtime_warning(const char *format,...)
Report a runtime warning.
void PrintExtraVersionInfo(const char *category, const char *format,...)
Prints arbitrary extra TBB version information on stderr.
Definition: tbb_misc.cpp:211

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.