Home ⌂Doc Index ◂Up ▴
Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbb/parallel_for.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2020 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #ifndef __TBB_parallel_for_H
18 #define __TBB_parallel_for_H
19 
20 #define __TBB_parallel_for_H_include_area
22 
23 #include <new>
24 #include "task.h"
25 #include "partitioner.h"
26 #include "blocked_range.h"
27 #include "tbb_exception.h"
29 
30 namespace tbb {
31 
32 namespace interface9 {
34 namespace internal {
35 
37  void* allocate_sibling(task* start_for_task, size_t bytes);
38 
40 
41  template<typename Range, typename Body, typename Partitioner>
42  class start_for: public task {
43  Range my_range;
44  const Body my_body;
45  typename Partitioner::task_partition_type my_partition;
47 
50  my_partition.note_affinity( id );
51  }
52 
53  public:
55  start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
56  my_range(range),
57  my_body(body),
58  my_partition(partitioner)
59  {
60  tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, NULL);
61  }
63 
64  start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
65  my_range(parent_.my_range, split_obj),
66  my_body(parent_.my_body),
67  my_partition(parent_.my_partition, split_obj)
68  {
69  my_partition.set_affinity(*this);
70  tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
71  }
73 
74  start_for( start_for& parent_, const Range& r, depth_t d ) :
75  my_range(r),
76  my_body(parent_.my_body),
77  my_partition(parent_.my_partition, split())
78  {
79  my_partition.set_affinity(*this);
80  my_partition.align_depth( d );
81  tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
82  }
83  static void run( const Range& range, const Body& body, Partitioner& partitioner ) {
84  if( !range.empty() ) {
85 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
86  start_for& a = *new(task::allocate_root()) start_for(range,body,partitioner);
87 #else
88  // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
89  // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
90  task_group_context context(PARALLEL_FOR);
91  start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
92 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
93  // REGION BEGIN
94  fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
96  fgt_end_algorithm( (void*)&context );
97  // REGION END
98  }
99  }
100 #if __TBB_TASK_GROUP_CONTEXT
101  static void run( const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context ) {
102  if( !range.empty() ) {
103  start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
104  // REGION BEGIN
105  fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
107  fgt_end_algorithm( (void*)&context );
108  // END REGION
109  }
110  }
111 #endif /* __TBB_TASK_GROUP_CONTEXT */
112  void run_body( Range &r ) {
114  fgt_alg_begin_body( tbb::internal::PARALLEL_FOR_TASK, (void *)const_cast<Body*>(&(this->my_body)), (void*)this );
115  my_body( r );
116  fgt_alg_end_body( (void *)const_cast<Body*>(&(this->my_body)) );
117  }
118 
120  void offer_work(typename Partitioner::split_type& split_obj) {
121  spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, split_obj) );
122  }
124  void offer_work(const Range& r, depth_t d = 0) {
125  spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, r, d) );
126  }
127  };
128 
130  // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
131  inline void* allocate_sibling(task* start_for_task, size_t bytes) {
132  task* parent_ptr = new( start_for_task->allocate_continuation() ) flag_task();
133  start_for_task->set_parent(parent_ptr);
134  parent_ptr->set_ref_count(2);
135  return &parent_ptr->allocate_child().allocate(bytes);
136  }
137 
139  template<typename Range, typename Body, typename Partitioner>
141  my_partition.check_being_stolen( *this );
142  my_partition.execute(*this, my_range);
143  return NULL;
144  }
145 } // namespace internal
147 } // namespace interfaceX
148 
150 namespace internal {
152 
154  template<typename Function, typename Index>
156  const Function &my_func;
157  const Index my_begin;
158  const Index my_step;
159  public:
160  parallel_for_body( const Function& _func, Index& _begin, Index& _step )
161  : my_func(_func), my_begin(_begin), my_step(_step) {}
162 
163  void operator()( const tbb::blocked_range<Index>& r ) const {
164  // A set of local variables to help the compiler with vectorization of the following loop.
165  Index b = r.begin();
166  Index e = r.end();
167  Index ms = my_step;
168  Index k = my_begin + b*ms;
169 
170 #if __INTEL_COMPILER
171 #pragma ivdep
172 #if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
173 #pragma vector always assert
174 #endif
175 #endif
176  for ( Index i = b; i < e; ++i, k += ms ) {
177  my_func( k );
178  }
179  }
180  };
181 } // namespace internal
183 
184 // Requirements on Range concept are documented in blocked_range.h
185 
196 
198 
199 template<typename Range, typename Body>
200 void parallel_for( const Range& range, const Body& body ) {
202 }
203 
205 
206 template<typename Range, typename Body>
207 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
209 }
210 
212 
213 template<typename Range, typename Body>
214 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
216 }
217 
219 
220 template<typename Range, typename Body>
221 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
223 }
224 
226 
227 template<typename Range, typename Body>
228 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
230 }
231 
232 #if __TBB_TASK_GROUP_CONTEXT
233 
235 template<typename Range, typename Body>
236 void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
238 }
239 
241 
242 template<typename Range, typename Body>
243 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
244  internal::start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
245 }
246 
248 
249 template<typename Range, typename Body>
250 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
251  internal::start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
252 }
253 
255 
256 template<typename Range, typename Body>
257 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
258  internal::start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
259 }
260 
262 
263 template<typename Range, typename Body>
264 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
265  internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
266 }
267 #endif /* __TBB_TASK_GROUP_CONTEXT */
268 
269 
270 namespace strict_ppl {
271 
273 template <typename Index, typename Function, typename Partitioner>
275 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
276  if (step <= 0 )
277  internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
278  else if (last > first) {
279  // Above "else" avoids "potential divide by zero" warning on some platforms
280  Index end = (last - first - Index(1)) / step + Index(1);
281  tbb::blocked_range<Index> range(static_cast<Index>(0), end);
283  tbb::parallel_for(range, body, partitioner);
284  }
285 }
286 
288 template <typename Index, typename Function>
289 void parallel_for(Index first, Index last, Index step, const Function& f) {
290  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
291 }
293 template <typename Index, typename Function>
294 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
295  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
296 }
298 template <typename Index, typename Function>
299 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
300  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
301 }
303 template <typename Index, typename Function>
304 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
305  parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
306 }
308 template <typename Index, typename Function>
309 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
310  parallel_for_impl(first, last, step, f, partitioner);
311 }
312 
314 template <typename Index, typename Function>
315 void parallel_for(Index first, Index last, const Function& f) {
316  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
317 }
319 template <typename Index, typename Function>
320 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
321  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
322 }
324 template <typename Index, typename Function>
325 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
326  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
327 }
329 template <typename Index, typename Function>
330 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
331  parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
332 }
334 template <typename Index, typename Function>
335 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
336  parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
337 }
338 
339 #if __TBB_TASK_GROUP_CONTEXT
340 template <typename Index, typename Function, typename Partitioner>
342 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, tbb::task_group_context &context) {
343  if (step <= 0 )
344  internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
345  else if (last > first) {
346  // Above "else" avoids "potential divide by zero" warning on some platforms
347  Index end = (last - first - Index(1)) / step + Index(1);
348  tbb::blocked_range<Index> range(static_cast<Index>(0), end);
350  tbb::parallel_for(range, body, partitioner, context);
351  }
352 }
353 
355 template <typename Index, typename Function>
356 void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) {
357  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
358 }
360  template <typename Index, typename Function>
361 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
362  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
363 }
365  template <typename Index, typename Function>
366 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
367  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
368 }
370 template <typename Index, typename Function>
371 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
372  parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
373 }
375  template <typename Index, typename Function>
376 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
377  parallel_for_impl(first, last, step, f, partitioner, context);
378 }
379 
380 
382 template <typename Index, typename Function>
383 void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) {
384  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
385 }
387  template <typename Index, typename Function>
388 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
389  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
390 }
392  template <typename Index, typename Function>
393 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
394  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
395 }
397 template <typename Index, typename Function>
398 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
399  parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
400 }
402  template <typename Index, typename Function>
403 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
404  parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
405 }
406 
407 #endif /* __TBB_TASK_GROUP_CONTEXT */
408 
409 
410 } // namespace strict_ppl
411 
413 
414 } // namespace tbb
415 
416 #if TBB_PREVIEW_SERIAL_SUBSET
417 #define __TBB_NORMAL_EXECUTION
418 #include "../serial/tbb/parallel_for.h"
419 #undef __TBB_NORMAL_EXECUTION
420 #endif
421 
423 #undef __TBB_parallel_for_H_include_area
424 
425 #endif /* __TBB_parallel_for_H */
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp end
Join task node that contains shared flag for stealing feedback.
Definition: partitioner.h:128
Used to form groups of tasks.
Definition: task.h:358
start_for(start_for &parent_, typename Partitioner::split_type &split_obj)
Splitting constructor used to generate children.
Base class for user-defined tasks.
Definition: task.h:615
task * execute() __TBB_override
execute task for parallel_for
static void run(const Range &range, const Body &body, Partitioner &partitioner)
void set_parent(task *p)
sets parent task pointer to specified value
Definition: task.h:868
Partitioner::task_partition_type my_partition
parallel_for_body(const Function &_func, Index &_begin, Index &_step)
virtual task * execute()=0
Should be overridden by derived classes.
void * allocate_sibling(task *start_for_task, size_t bytes)
allocate right task with new parent
void parallel_for(const Range &range, const Body &body)
Parallel iteration over range with default partitioner.
Base class for types that should not be assigned.
Definition: tbb_stddef.h:322
start_for(start_for &parent_, const Range &r, depth_t d)
Construct right child from the given range as response to the demand.
void run_body(Range &r)
Run body for range, serves as callback for partitioner.
#define __TBB_DEFAULT_PARTITIONER
Definition: tbb_config.h:596
auto last(Container &c) -> decltype(begin(c))
static void fgt_algorithm(string_index, void *, void *)
void operator()(const tbb::blocked_range< Index > &r) const
void parallel_for(Index first, Index last, Index step, const Function &f)
Parallel iteration over a range of integers with a step provided and default partitioner.
static void run(const Range &range, const Body &body, Partitioner &partitioner, task_group_context &context)
static internal::allocate_root_proxy allocate_root()
Returns proxy for overloaded new that allocates a root task.
Definition: task.h:663
A static partitioner.
Definition: partitioner.h:632
static void fgt_alg_begin_body(string_index, void *, void *)
start_for(const Range &range, const Body &body, Partitioner &partitioner)
Constructor for root task.
internal::allocate_child_proxy & allocate_child()
Returns proxy for overloaded new that allocates a child task of *this.
Definition: task.h:681
const_iterator begin() const
Beginning of range.
Definition: blocked_range.h:69
task_group_context * context()
This method is deprecated and will be removed in the future.
Definition: task.h:878
static void fgt_alg_end_body(void *)
static void fgt_end_algorithm(void *)
static void spawn_root_and_wait(task &root)
Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
Definition: task.h:808
void offer_work(typename Partitioner::split_type &split_obj)
spawn right task, serves as callback for partitioner
internal::allocate_continuation_proxy & allocate_continuation()
Returns proxy for overloaded new that allocates a continuation task of *this.
Definition: task.h:676
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
A simple partitioner.
Definition: partitioner.h:586
void note_affinity(affinity_id id) __TBB_override
Update affinity info, if any.
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task * task
Dummy type that distinguishes splitting constructor from copy constructor.
Definition: tbb_stddef.h:416
void set_ref_count(int count)
Set reference count.
Definition: task.h:761
Calls the function with values from range [begin, end) with a step provided.
auto first(Container &c) -> decltype(begin(c))
An auto partitioner.
Definition: partitioner.h:613
#define __TBB_override
Definition: tbb_stddef.h:240
A range over which to iterate.
Definition: blocked_range.h:45
Task type used in parallel_for.
void parallel_for_impl(Index first, Index last, Index step, const Function &f, Partitioner &partitioner)
Implementation of parallel iteration over stepped range of integers with explicit step and partitione...
const_iterator end() const
One past last value in range.
Definition: blocked_range.h:72
An affinity partitioner.
Definition: partitioner.h:651
void throw_exception(exception_id eid)
Versionless convenience wrapper for throw_exception_v4()
void offer_work(const Range &r, depth_t d=0)
spawn right task, serves as callback for partitioner
The graph class.
static void fgt_begin_algorithm(string_index, void *)
internal::affinity_id affinity_id
An id as used for specifying affinity.
Definition: task.h:940
task &__TBB_EXPORTED_METHOD allocate(size_t size) const
Definition: task.cpp:114

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.