Home ⌂Doc Index ◂Up ▴
Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbb_bind.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2020 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #include "../tbb/tbb_assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here.
18 #include "tbb/tbb_stddef.h"
19 
20 #if _MSC_VER && !__INTEL_COMPILER
21 #pragma warning( push )
22 #pragma warning( disable : 4100 )
23 #endif
24 #include <hwloc.h>
25 #if _MSC_VER && !__INTEL_COMPILER
26 #pragma warning( pop )
27 #endif
28 
29 #include <vector>
30 
31 // Most of hwloc calls returns negative exit code on error.
32 // This macro tracks error codes that are returned from the hwloc interfaces.
33 #define assertion_hwloc_wrapper(command, ...) \
34  __TBB_ASSERT_EX( (command(__VA_ARGS__)) >= 0, "Error occurred during call to hwloc API.");
35 
36 namespace tbb {
37 namespace internal {
38 
39 //------------------------------------------------------------------------
40 // Information about the machine's hardware TBB is happen to work on
41 //------------------------------------------------------------------------
43  friend class numa_affinity_handler;
44 
45  // TODO: add the `my_` prefix to the members
46  hwloc_topology_t topology;
47  hwloc_cpuset_t process_cpu_affinity_mask;
48  hwloc_nodeset_t process_node_affinity_mask;
49  std::vector<hwloc_cpuset_t> affinity_masks_list;
50 
51  std::vector<int> default_concurrency_list;
52  std::vector<int> numa_indexes_list;
54 
60 
61  // Binding threads to NUMA nodes that locates in another Windows Processor groups
62  // is allowed only if machine topology contains several Windows Processors groups
63  // and process affinity mask wasn`t limited manually (affinity mask cannot violates
64  // processors group boundaries).
65  bool intergroup_binding_allowed(size_t groups_num) { return groups_num > 1; }
66 
72 
73 public:
74  typedef hwloc_cpuset_t affinity_mask;
75  typedef hwloc_const_cpuset_t const_affinity_mask;
76 
79  return topology;
80  }
81 
83 
84  void initialize( size_t groups_num ) {
86  return;
88 
89  // Parse topology
90  if ( hwloc_topology_init( &topology ) == 0 ) {
92  if ( hwloc_topology_load( topology ) == 0 ) {
94  }
95  }
96 
97  // Fill parameters with stubs if topology parsing is broken.
100  hwloc_topology_destroy(topology);
101  }
102  numa_nodes_count = 1;
103  numa_indexes_list.push_back(-1);
104  default_concurrency_list.push_back(-1);
105  return;
106  }
107 
108  // Getting process affinity mask
109  if ( intergroup_binding_allowed(groups_num) ) {
110  process_cpu_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology));
111  process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology));
112  } else {
113  process_cpu_affinity_mask = hwloc_bitmap_alloc();
114  process_node_affinity_mask = hwloc_bitmap_alloc();
115 
118  }
119 
120  // If system contains no NUMA nodes, HWLOC 1.11 returns an infinitely filled bitmap.
121  // hwloc_bitmap_weight() returns negative value for such bitmaps, so we use this check
122  // to change way of topology initialization.
123  if (hwloc_bitmap_weight(process_node_affinity_mask) < 0) {
124  numa_nodes_count = 1;
125  numa_indexes_list.push_back(0);
126  default_concurrency_list.push_back(hwloc_bitmap_weight(process_cpu_affinity_mask));
127 
128  affinity_masks_list.push_back(hwloc_bitmap_dup(process_cpu_affinity_mask));
130  return;
131  }
132 
133  // Get number of available NUMA nodes
134  numa_nodes_count = hwloc_bitmap_weight(process_node_affinity_mask);
135  __TBB_ASSERT(numa_nodes_count > 0, "Any system must contain one or more NUMA nodes");
136 
137  // Get NUMA logical indexes list
138  unsigned counter = 0;
139  int i = 0;
140  int max_numa_index = -1;
142  hwloc_obj_t node_buffer;
143  hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
144  node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i);
145  numa_indexes_list[counter] = static_cast<int>(node_buffer->logical_index);
146 
147  if ( numa_indexes_list[counter] > max_numa_index ) {
148  max_numa_index = numa_indexes_list[counter];
149  }
150 
151  counter++;
152  } hwloc_bitmap_foreach_end();
153  __TBB_ASSERT(max_numa_index >= 0, "Maximal NUMA index must not be negative");
154 
155  // Fill concurrency and affinity masks lists
156  default_concurrency_list.resize(max_numa_index + 1);
157  affinity_masks_list.resize(max_numa_index + 1);
158 
159  int index = 0;
160  hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
161  node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i);
162  index = static_cast<int>(node_buffer->logical_index);
163 
164  hwloc_cpuset_t& current_mask = affinity_masks_list[index];
165  current_mask = hwloc_bitmap_dup(node_buffer->cpuset);
166 
167  hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
168  __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), "hwloc detected unavailable NUMA node");
169  default_concurrency_list[index] = hwloc_bitmap_weight(current_mask);
170  } hwloc_bitmap_foreach_end();
172  }
173 
175  if ( is_topology_parsed() ) {
176  for (int i = 0; i < numa_nodes_count; i++) {
177  hwloc_bitmap_free(affinity_masks_list[numa_indexes_list[i]]);
178  }
179  hwloc_bitmap_free(process_node_affinity_mask);
180  hwloc_bitmap_free(process_cpu_affinity_mask);
181  }
182 
184  hwloc_topology_destroy(topology);
185  }
186 
188  }
189 
190  void fill(int& nodes_count, int*& indexes_list, int*& concurrency_list ) {
191  __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
192  nodes_count = numa_nodes_count;
193  indexes_list = &numa_indexes_list.front();
194  concurrency_list = &default_concurrency_list.front();
195  }
196 
198  __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
199  return hwloc_bitmap_dup(process_cpu_affinity_mask);
200  }
201 
202  void free_affinity_mask( affinity_mask mask_to_free ) {
203  hwloc_bitmap_free(mask_to_free); // If bitmap is NULL, no operation is performed.
204  }
205 
207  assertion_hwloc_wrapper(hwloc_get_cpubind, topology, current_mask, HWLOC_CPUBIND_THREAD);
208 
209  hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
210  __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask),
211  "Current affinity mask must intersects with process affinity mask");
212  }
213 
215  assertion_hwloc_wrapper(hwloc_set_cpubind, topology, new_mask, HWLOC_CPUBIND_THREAD);
216  }
217 
219  __TBB_ASSERT((int)affinity_masks_list.size() > node_index,
220  "Trying to get affinity mask for uninitialized NUMA node");
221  return affinity_masks_list[node_index];
222  }
223 };
224 
226  // Following vector saves thread affinity mask on scheduler entry to return it to this thread
227  // on scheduler exit.
228  typedef std::vector<platform_topology::affinity_mask> affinity_masks_container;
230 
231 public:
233  for (affinity_masks_container::iterator it = affinity_backup.begin();
234  it != affinity_backup.end(); it++) {
236  }
237  }
238 
240  for (affinity_masks_container::iterator it = affinity_backup.begin();
241  it != affinity_backup.end(); it++) {
243  }
244  }
245 
246  void bind_thread_to_node( unsigned slot_num, unsigned numa_node_id ) {
247  __TBB_ASSERT(slot_num < affinity_backup.size(),
248  "The slot number is greater than the number of slots in the arena");
249  __TBB_ASSERT(platform_topology::instance().is_topology_parsed(),
250  "Trying to get access to uninitialized platform_topology");
252 
254  platform_topology::instance().get_node_affinity_mask(numa_node_id));
255  }
256 
257  void restore_previous_affinity_mask( unsigned slot_num ) {
258  __TBB_ASSERT(platform_topology::instance().is_topology_parsed(),
259  "Trying to get access to uninitialized platform_topology");
261  };
262 
263 };
264 
265 extern "C" { // exported to TBB interfaces
266 
267 void initialize_numa_topology( size_t groups_num,
268  int& nodes_count, int*& indexes_list, int*& concurrency_list ) {
270  platform_topology::instance().fill(nodes_count, indexes_list, concurrency_list);
271 }
272 
274  __TBB_ASSERT(slot_num > 0, "Trying to create numa handler for 0 threads.");
275  return new binding_handler(slot_num);
276 }
277 
279  __TBB_ASSERT(handler_ptr != NULL, "Trying to deallocate NULL pointer.");
280  delete handler_ptr;
281 }
282 
283 void bind_to_node(binding_handler* handler_ptr, int slot_num, int numa_id) {
284  __TBB_ASSERT(handler_ptr != NULL, "Trying to get access to uninitialized metadata.");
285  __TBB_ASSERT(platform_topology::instance().is_topology_parsed(),
286  "Trying to get access to uninitialized platform_topology.");
287  handler_ptr->bind_thread_to_node(slot_num, numa_id);
288 }
289 
290 void restore_affinity(binding_handler* handler_ptr, int slot_num) {
291  __TBB_ASSERT(handler_ptr != NULL, "Trying to get access to uninitialized metadata.");
292  __TBB_ASSERT(platform_topology::instance().is_topology_parsed(),
293  "Trying to get access to uninitialized platform_topology.");
294  handler_ptr->restore_previous_affinity_mask(slot_num);
295 }
296 
297 } // extern "C"
298 
299 } // namespace internal
300 } // namespace tbb
301 
302 #undef assertion_hwloc_wrapper
void set_new_affinity_mask(const_affinity_mask new_mask)
Definition: tbb_bind.cpp:214
hwloc_cpuset_t process_cpu_affinity_mask
Definition: tbb_bind.cpp:47
affinity_mask allocate_process_affinity_mask()
Definition: tbb_bind.cpp:197
void fill(int &nodes_count, int *&indexes_list, int *&concurrency_list)
Definition: tbb_bind.cpp:190
void restore_previous_affinity_mask(unsigned slot_num)
Definition: tbb_bind.cpp:257
hwloc_const_cpuset_t const_affinity_mask
Definition: tbb_bind.cpp:75
std::vector< platform_topology::affinity_mask > affinity_masks_container
Definition: tbb_bind.cpp:228
std::vector< int > default_concurrency_list
Definition: tbb_bind.cpp:51
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void initialize(size_t groups_num)
Definition: tbb_bind.cpp:84
void store_current_affinity_mask(affinity_mask current_mask)
Definition: tbb_bind.cpp:206
void bind_to_node(binding_handler *handler_ptr, int slot_num, int numa_id)
Definition: tbb_bind.cpp:283
void initialize_numa_topology(size_t groups_num, int &nodes_count, int *&indexes_list, int *&concurrency_list)
Definition: tbb_bind.cpp:267
static platform_topology & instance()
Definition: tbb_bind.cpp:77
void deallocate_binding_handler(binding_handler *handler_ptr)
Definition: tbb_bind.cpp:278
const_affinity_mask get_node_affinity_mask(int node_index)
Definition: tbb_bind.cpp:218
bool intergroup_binding_allowed(size_t groups_num)
Definition: tbb_bind.cpp:65
std::vector< hwloc_cpuset_t > affinity_masks_list
Definition: tbb_bind.cpp:49
binding_handler * allocate_binding_handler(int slot_num)
Definition: tbb_bind.cpp:273
#define assertion_hwloc_wrapper(command,...)
Definition: tbb_bind.cpp:33
void free_affinity_mask(affinity_mask mask_to_free)
Definition: tbb_bind.cpp:202
void bind_thread_to_node(unsigned slot_num, unsigned numa_node_id)
Definition: tbb_bind.cpp:246
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
void restore_affinity(binding_handler *handler_ptr, int slot_num)
Definition: tbb_bind.cpp:290
hwloc_nodeset_t process_node_affinity_mask
Definition: tbb_bind.cpp:48
The graph class.
enum tbb::internal::platform_topology::init_stages initialization_state
affinity_masks_container affinity_backup
Definition: tbb_bind.cpp:229
std::vector< int > numa_indexes_list
Definition: tbb_bind.cpp:52

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.