22 #if !defined(__TBB_HardwareConcurrency)    30 #if __TBB_WIN8UI_SUPPORT    36 #include <sys/sysinfo.h>    41 #include <sys/sysinfo.h>    45 #include <sys/param.h>      46 #include <sys/cpuset.h>    53 #if __TBB_USE_OS_AFFINITY_SYSCALL    57 static int (*libiomp_try_restoring_original_mask)();
    60     DLD_NOWEAK( kmp_set_thread_affinity_mask_initial, libiomp_try_restoring_original_mask )
    64 static void set_thread_affinity_mask( 
size_t maskSize, 
const basic_mask_t* threadMask ) {
    66     if( sched_setaffinity( 0, maskSize, threadMask ) )
    68     if( cpuset_setaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
    73 static void get_thread_affinity_mask( 
size_t maskSize, basic_mask_t* threadMask ) {
    75     if( sched_getaffinity( 0, maskSize, threadMask ) )
    77     if( cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
    82 static basic_mask_t* process_mask;
    87         delete [] process_mask;
    91 #define curMaskSize sizeof(basic_mask_t) * num_masks    92 affinity_helper::~affinity_helper() {
    95             set_thread_affinity_mask( curMaskSize, threadMask );
   101     if( threadMask == NULL && num_masks ) { 
   102         threadMask = 
new basic_mask_t [num_masks];
   103         memset( threadMask, 0, curMaskSize );
   104         get_thread_affinity_mask( curMaskSize, threadMask );
   105         if( restore_process_mask ) {
   106             __TBB_ASSERT( process_mask, 
"A process mask is requested but not yet stored" );
   107             is_changed = memcmp( process_mask, threadMask, curMaskSize );
   109                 set_thread_affinity_mask( curMaskSize, process_mask );
   118         delete [] threadMask;
   125 static atomic<do_once_state> hardware_concurrency_info;
   127 static int theNumProcs;
   129 static void initialize_hardware_concurrency_info () {
   131     int availableProcs = 0;
   134 #if __TBB_MAIN_THREAD_AFFINITY_BROKEN   135     int maxProcs = INT_MAX; 
   138     int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
   142     int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
   144     basic_mask_t* processMask;
   145     const size_t BasicMaskSize =  
sizeof(basic_mask_t);
   147         const int curMaskSize = BasicMaskSize * numMasks;
   148         processMask = 
new basic_mask_t[numMasks];
   149         memset( processMask, 0, curMaskSize );
   151         err = sched_getaffinity( pid, curMaskSize, processMask );
   152         if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 )
   156 #if __TBB_MAIN_THREAD_AFFINITY_BROKEN   157         err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, curMaskSize, processMask );
   159         err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask );
   161         if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 )
   164         delete[] processMask;
   169         num_masks = numMasks; 
   176             affinity_helper affhelp;
   177             affhelp.protect_affinity_mask( 
false );
   178             if ( libiomp_try_restoring_original_mask()==0 ) {
   180                 const int curMaskSize = BasicMaskSize * numMasks;
   181                 memset( processMask, 0, curMaskSize );
   182                 get_thread_affinity_mask( curMaskSize, processMask );
   189         for ( 
int m = 0; availableProcs < maxProcs && m < numMasks; ++m ) {
   190             for ( 
size_t i = 0; (availableProcs < maxProcs) && (i < BasicMaskSize * CHAR_BIT); ++i ) {
   191                 if ( CPU_ISSET( i, processMask + m ) )
   195         process_mask = processMask;
   199         availableProcs = (maxProcs == INT_MAX) ? sysconf(_SC_NPROCESSORS_ONLN) : maxProcs;
   200         delete[] processMask;
   202     theNumProcs = availableProcs > 0 ? availableProcs : 1; 
   203     __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), NULL );
   207     atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
   217     FILE *fp = fopen(
"/sys/devices/system/cpu/present", 
"r");
   218     if (fp == NULL) 
return 1;
   219     int num_args, lower, upper, num_cpus=0;
   220     while ((num_args = fscanf(fp, 
"%u-%u", &lower, &upper)) != EOF) {
   222             case 2: num_cpus += upper - lower + 1; 
break;
   223             case 1: num_cpus += 1; 
break;
   227     return (num_cpus > 0) ? num_cpus : 1;
   230 #elif defined(_SC_NPROCESSORS_ONLN)   233     int n = sysconf(_SC_NPROCESSORS_ONLN);
   234     return (n > 0) ? n : 1;
   239 static atomic<do_once_state> hardware_concurrency_info;
   241 static const WORD TBB_ALL_PROCESSOR_GROUPS = 0xffff;
   245 static const WORD MaxProcessorGroups = 64;
   247 struct ProcessorGroupInfo {
   250     int         numProcsRunningTotal;   
   253     static int NumGroups;
   262     static int HoleIndex;
   265 int ProcessorGroupInfo::NumGroups = 1;
   266 int ProcessorGroupInfo::HoleIndex = 0;
   268 ProcessorGroupInfo theProcessorGroups[MaxProcessorGroups];
   270 struct TBB_GROUP_AFFINITY {
   276 static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = NULL;
   277 static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = NULL;
   278 static BOOL (WINAPI *TBB_SetThreadGroupAffinity)( HANDLE hThread,
   279                         const TBB_GROUP_AFFINITY* newAff, TBB_GROUP_AFFINITY *prevAff );
   280 static BOOL (WINAPI *TBB_GetThreadGroupAffinity)( HANDLE hThread, TBB_GROUP_AFFINITY* );
   283       DLD(GetActiveProcessorCount, TBB_GetActiveProcessorCount)
   284     , 
DLD(GetActiveProcessorGroupCount, TBB_GetActiveProcessorGroupCount)
   285     , 
DLD(SetThreadGroupAffinity, TBB_SetThreadGroupAffinity)
   286     , 
DLD(GetThreadGroupAffinity, TBB_GetThreadGroupAffinity)
   289 static void initialize_hardware_concurrency_info () {
   290 #if __TBB_WIN8UI_SUPPORT   293     theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = std::thread::hardware_concurrency();
   295     dynamic_link( 
"Kernel32.dll", ProcessorGroupsApiLinkTable,
   298     GetNativeSystemInfo(&si);
   299     DWORD_PTR pam, sam, m = 1;
   300     GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam );
   302     for ( 
size_t i = 0; i < 
sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) {
   306     __TBB_ASSERT( nproc <= (
int)si.dwNumberOfProcessors, NULL );
   308     theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc;
   310     if ( nproc == (
int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) {
   312         ProcessorGroupInfo::NumGroups = (
int)TBB_GetActiveProcessorGroupCount();
   313         __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL );
   316         if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups )
   317             ProcessorGroupInfo::NumGroups = MaxProcessorGroups;
   318         if ( ProcessorGroupInfo::NumGroups > 1 ) {
   319             TBB_GROUP_AFFINITY ga;
   320             if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) )
   321                 ProcessorGroupInfo::HoleIndex = ga.Group;
   323             for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) {
   324                 ProcessorGroupInfo  &pgi = theProcessorGroups[i];
   325                 pgi.numProcs = (
int)TBB_GetActiveProcessorCount(i);
   326                 __TBB_ASSERT( pgi.numProcs <= (
int)
sizeof(DWORD_PTR) * CHAR_BIT, NULL );
   327                 pgi.mask = pgi.numProcs == 
sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1;
   328                 pgi.numProcsRunningTotal = nprocs += pgi.numProcs;
   330             __TBB_ASSERT( nprocs == (
int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL );
   336     if (ProcessorGroupInfo::NumGroups>1)
   337         for (
int i=0; i<ProcessorGroupInfo::NumGroups; ++i)
   341 int NumberOfProcessorGroups() {
   343     return ProcessorGroupInfo::NumGroups;
   347 #define HoleAdjusted(procIdx, grpIdx) (procIdx + (holeIdx <= grpIdx))   349 int FindProcessorGroupIndex ( 
int procIdx ) {
   352     const int numProcs = theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
   353     if ( procIdx >= numProcs - 1 ) {
   355         procIdx = (procIdx - numProcs + 1) % numProcs;
   358         holeIdx = ProcessorGroupInfo::HoleIndex;
   361     int i = procIdx / theProcessorGroups[0].numProcs;
   363     if (i >= ProcessorGroupInfo::NumGroups) i = ProcessorGroupInfo::NumGroups-1;
   365     if ( theProcessorGroups[i].numProcsRunningTotal > HoleAdjusted(procIdx, i) ) {
   366         while ( theProcessorGroups[i].numProcsRunningTotal - theProcessorGroups[i].numProcs > HoleAdjusted(procIdx, i) ) {
   374         } 
while ( theProcessorGroups[i].numProcsRunningTotal <= HoleAdjusted(procIdx, i) );
   376     __TBB_ASSERT( i < ProcessorGroupInfo::NumGroups, NULL );
   380 void MoveThreadIntoProcessorGroup( 
void* hThread, 
int groupIndex ) {
   382     if ( !TBB_SetThreadGroupAffinity )
   384     TBB_GROUP_AFFINITY ga = { theProcessorGroups[groupIndex].mask, (WORD)groupIndex, {0,0,0} };
   385     TBB_SetThreadGroupAffinity( hThread, &ga, NULL );
   389     atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
   390     return theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
   395     #error AvailableHwConcurrency is not implemented for this OS void dynamic_unlink(dynamic_link_handle)
 
int AvailableHwConcurrency()
Returns maximal parallelism level supported by the current OS configuration.
 
Association between a handler name and location of pointer to it.
 
const int DYNAMIC_LINK_GLOBAL
 
void * dynamic_link_handle
 
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
 
void atomic_do_once(const F &initializer, atomic< do_once_state > &state)
One-time initialization function.
 
void protect_affinity_mask(bool)
 
void destroy_process_mask()
 
OPEN_INTERNAL_NAMESPACE bool dynamic_link(const char *, const dynamic_link_descriptor *, size_t, dynamic_link_handle *handle, int)
 
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
 
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
 
#define DLD(s, h)
The helper to construct dynamic_link_descriptor structure.
 
void __TBB_EXPORTED_FUNC runtime_warning(const char *format,...)
Report a runtime warning.
 
void PrintExtraVersionInfo(const char *category, const char *format,...)
Prints arbitrary extra TBB version information on stderr.