22 #if !defined(__TBB_HardwareConcurrency) 30 #if __TBB_WIN8UI_SUPPORT 36 #include <sys/sysinfo.h> 41 #include <sys/sysinfo.h> 45 #include <sys/param.h> 46 #include <sys/cpuset.h> 53 #if __TBB_USE_OS_AFFINITY_SYSCALL 57 static int (*libiomp_try_restoring_original_mask)();
60 DLD_NOWEAK( kmp_set_thread_affinity_mask_initial, libiomp_try_restoring_original_mask )
64 static void set_thread_affinity_mask(
size_t maskSize,
const basic_mask_t* threadMask ) {
66 if( sched_setaffinity( 0, maskSize, threadMask ) )
68 if( cpuset_setaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
73 static void get_thread_affinity_mask(
size_t maskSize, basic_mask_t* threadMask ) {
75 if( sched_getaffinity( 0, maskSize, threadMask ) )
77 if( cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
82 static basic_mask_t* process_mask;
87 delete [] process_mask;
91 #define curMaskSize sizeof(basic_mask_t) * num_masks 92 affinity_helper::~affinity_helper() {
95 set_thread_affinity_mask( curMaskSize, threadMask );
101 if( threadMask == NULL && num_masks ) {
102 threadMask =
new basic_mask_t [num_masks];
103 memset( threadMask, 0, curMaskSize );
104 get_thread_affinity_mask( curMaskSize, threadMask );
105 if( restore_process_mask ) {
106 __TBB_ASSERT( process_mask,
"A process mask is requested but not yet stored" );
107 is_changed = memcmp( process_mask, threadMask, curMaskSize );
109 set_thread_affinity_mask( curMaskSize, process_mask );
118 delete [] threadMask;
125 static atomic<do_once_state> hardware_concurrency_info;
127 static int theNumProcs;
129 static void initialize_hardware_concurrency_info () {
131 int availableProcs = 0;
134 #if __TBB_MAIN_THREAD_AFFINITY_BROKEN 135 int maxProcs = INT_MAX;
138 int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
142 int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
144 basic_mask_t* processMask;
145 const size_t BasicMaskSize =
sizeof(basic_mask_t);
147 const int curMaskSize = BasicMaskSize * numMasks;
148 processMask =
new basic_mask_t[numMasks];
149 memset( processMask, 0, curMaskSize );
151 err = sched_getaffinity( pid, curMaskSize, processMask );
152 if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 )
156 #if __TBB_MAIN_THREAD_AFFINITY_BROKEN 157 err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, curMaskSize, processMask );
159 err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask );
161 if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 )
164 delete[] processMask;
169 num_masks = numMasks;
176 affinity_helper affhelp;
177 affhelp.protect_affinity_mask(
false );
178 if ( libiomp_try_restoring_original_mask()==0 ) {
180 const int curMaskSize = BasicMaskSize * numMasks;
181 memset( processMask, 0, curMaskSize );
182 get_thread_affinity_mask( curMaskSize, processMask );
189 for (
int m = 0; availableProcs < maxProcs && m < numMasks; ++m ) {
190 for (
size_t i = 0; (availableProcs < maxProcs) && (i < BasicMaskSize * CHAR_BIT); ++i ) {
191 if ( CPU_ISSET( i, processMask + m ) )
195 process_mask = processMask;
199 availableProcs = (maxProcs == INT_MAX) ? sysconf(_SC_NPROCESSORS_ONLN) : maxProcs;
200 delete[] processMask;
202 theNumProcs = availableProcs > 0 ? availableProcs : 1;
203 __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), NULL );
207 atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
217 FILE *fp = fopen(
"/sys/devices/system/cpu/present",
"r");
218 if (fp == NULL)
return 1;
219 int num_args, lower, upper, num_cpus=0;
220 while ((num_args = fscanf(fp,
"%u-%u", &lower, &upper)) != EOF) {
222 case 2: num_cpus += upper - lower + 1;
break;
223 case 1: num_cpus += 1;
break;
227 return (num_cpus > 0) ? num_cpus : 1;
230 #elif defined(_SC_NPROCESSORS_ONLN) 233 int n = sysconf(_SC_NPROCESSORS_ONLN);
234 return (n > 0) ? n : 1;
239 static atomic<do_once_state> hardware_concurrency_info;
241 static const WORD TBB_ALL_PROCESSOR_GROUPS = 0xffff;
245 static const WORD MaxProcessorGroups = 64;
247 struct ProcessorGroupInfo {
250 int numProcsRunningTotal;
253 static int NumGroups;
262 static int HoleIndex;
265 int ProcessorGroupInfo::NumGroups = 1;
266 int ProcessorGroupInfo::HoleIndex = 0;
268 ProcessorGroupInfo theProcessorGroups[MaxProcessorGroups];
270 struct TBB_GROUP_AFFINITY {
276 static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = NULL;
277 static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = NULL;
278 static BOOL (WINAPI *TBB_SetThreadGroupAffinity)( HANDLE hThread,
279 const TBB_GROUP_AFFINITY* newAff, TBB_GROUP_AFFINITY *prevAff );
280 static BOOL (WINAPI *TBB_GetThreadGroupAffinity)( HANDLE hThread, TBB_GROUP_AFFINITY* );
283 DLD(GetActiveProcessorCount, TBB_GetActiveProcessorCount)
284 ,
DLD(GetActiveProcessorGroupCount, TBB_GetActiveProcessorGroupCount)
285 ,
DLD(SetThreadGroupAffinity, TBB_SetThreadGroupAffinity)
286 ,
DLD(GetThreadGroupAffinity, TBB_GetThreadGroupAffinity)
289 static void initialize_hardware_concurrency_info () {
290 #if __TBB_WIN8UI_SUPPORT 293 theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = std::thread::hardware_concurrency();
295 dynamic_link(
"Kernel32.dll", ProcessorGroupsApiLinkTable,
298 GetNativeSystemInfo(&si);
299 DWORD_PTR pam, sam, m = 1;
300 GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam );
302 for (
size_t i = 0; i <
sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) {
306 __TBB_ASSERT( nproc <= (
int)si.dwNumberOfProcessors, NULL );
308 theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc;
310 if ( nproc == (
int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) {
312 ProcessorGroupInfo::NumGroups = (
int)TBB_GetActiveProcessorGroupCount();
313 __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL );
316 if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups )
317 ProcessorGroupInfo::NumGroups = MaxProcessorGroups;
318 if ( ProcessorGroupInfo::NumGroups > 1 ) {
319 TBB_GROUP_AFFINITY ga;
320 if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) )
321 ProcessorGroupInfo::HoleIndex = ga.Group;
323 for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) {
324 ProcessorGroupInfo &pgi = theProcessorGroups[i];
325 pgi.numProcs = (
int)TBB_GetActiveProcessorCount(i);
326 __TBB_ASSERT( pgi.numProcs <= (
int)
sizeof(DWORD_PTR) * CHAR_BIT, NULL );
327 pgi.mask = pgi.numProcs ==
sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1;
328 pgi.numProcsRunningTotal = nprocs += pgi.numProcs;
330 __TBB_ASSERT( nprocs == (
int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL );
336 if (ProcessorGroupInfo::NumGroups>1)
337 for (
int i=0; i<ProcessorGroupInfo::NumGroups; ++i)
341 int NumberOfProcessorGroups() {
343 return ProcessorGroupInfo::NumGroups;
347 #define HoleAdjusted(procIdx, grpIdx) (procIdx + (holeIdx <= grpIdx)) 349 int FindProcessorGroupIndex (
int procIdx ) {
352 const int numProcs = theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
353 if ( procIdx >= numProcs - 1 ) {
355 procIdx = (procIdx - numProcs + 1) % numProcs;
358 holeIdx = ProcessorGroupInfo::HoleIndex;
361 int i = procIdx / theProcessorGroups[0].numProcs;
363 if (i >= ProcessorGroupInfo::NumGroups) i = ProcessorGroupInfo::NumGroups-1;
365 if ( theProcessorGroups[i].numProcsRunningTotal > HoleAdjusted(procIdx, i) ) {
366 while ( theProcessorGroups[i].numProcsRunningTotal - theProcessorGroups[i].numProcs > HoleAdjusted(procIdx, i) ) {
374 }
while ( theProcessorGroups[i].numProcsRunningTotal <= HoleAdjusted(procIdx, i) );
376 __TBB_ASSERT( i < ProcessorGroupInfo::NumGroups, NULL );
380 void MoveThreadIntoProcessorGroup(
void* hThread,
int groupIndex ) {
382 if ( !TBB_SetThreadGroupAffinity )
384 TBB_GROUP_AFFINITY ga = { theProcessorGroups[groupIndex].mask, (WORD)groupIndex, {0,0,0} };
385 TBB_SetThreadGroupAffinity( hThread, &ga, NULL );
389 atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
390 return theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
395 #error AvailableHwConcurrency is not implemented for this OS void dynamic_unlink(dynamic_link_handle)
int AvailableHwConcurrency()
Returns maximal parallelism level supported by the current OS configuration.
Association between a handler name and location of pointer to it.
const int DYNAMIC_LINK_GLOBAL
void * dynamic_link_handle
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
void atomic_do_once(const F &initializer, atomic< do_once_state > &state)
One-time initialization function.
void protect_affinity_mask(bool)
void destroy_process_mask()
OPEN_INTERNAL_NAMESPACE bool dynamic_link(const char *, const dynamic_link_descriptor *, size_t, dynamic_link_handle *handle, int)
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
#define DLD(s, h)
The helper to construct dynamic_link_descriptor structure.
void __TBB_EXPORTED_FUNC runtime_warning(const char *format,...)
Report a runtime warning.
void PrintExtraVersionInfo(const char *category, const char *format,...)
Prints arbitrary extra TBB version information on stderr.