Home ⌂Doc Index ◂Up ▴
Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
parallel_scan.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2020 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #ifndef __TBB_parallel_scan_H
18 #define __TBB_parallel_scan_H
19 
20 #define __TBB_parallel_scan_H_include_area
22 
23 #include "task.h"
24 #include "aligned_space.h"
25 #include <new>
26 #include "partitioner.h"
27 
28 namespace tbb {
29 
31 
32 struct pre_scan_tag {
33  static bool is_final_scan() {return false;}
34  operator bool() {return is_final_scan();}
35 };
36 
38 
40  static bool is_final_scan() {return true;}
41  operator bool() {return is_final_scan();}
42 };
43 
45 namespace internal {
46 
48 
49  template<typename Range, typename Body>
50  class final_sum: public task {
51  public:
52  Body my_body;
53  private:
54  aligned_space<Range> my_range;
57  public:
58  final_sum( Body& body_ ) :
59  my_body(body_,split())
60  {
62  }
64  my_range.begin()->~Range();
65  }
66  void finish_construction( const Range& range_, Body* stuff_last_ ) {
67  new( my_range.begin() ) Range(range_);
68  my_stuff_last = stuff_last_;
69  }
70  private:
72  my_body( *my_range.begin(), final_scan_tag() );
73  if( my_stuff_last )
74  my_stuff_last->assign(my_body);
75  return NULL;
76  }
77  };
78 
80 
81  template<typename Range, typename Body>
82  class sum_node: public task {
84  public:
88  private:
93  Range my_range;
94  sum_node( const Range range_, bool left_is_final_ ) :
95  my_stuff_last(NULL),
96  my_left_sum(NULL),
97  my_left(NULL),
98  my_right(NULL),
99  my_left_is_final(left_is_final_),
100  my_range(range_)
101  {
102  // Poison fields that will be set by second pass.
105  }
106  task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
107  if( !n ) {
108  f.recycle_as_child_of( *this );
109  f.finish_construction( range_, stuff_last_ );
110  return &f;
111  } else {
112  n->my_body = &f;
113  n->my_incoming = incoming_;
114  n->my_stuff_last = stuff_last_;
115  return n;
116  }
117  }
119  if( my_body ) {
120  if( my_incoming )
121  my_left_sum->my_body.reverse_join( my_incoming->my_body );
123  sum_node& c = *this;
126  set_ref_count( (a!=NULL)+(b!=NULL) );
127  my_body = NULL;
128  if( a ) spawn(*b);
129  else a = b;
130  return a;
131  } else {
132  return NULL;
133  }
134  }
135  template<typename Range_,typename Body_,typename Partitioner_>
136  friend class start_scan;
137 
138  template<typename Range_,typename Body_>
139  friend class finish_scan;
140  };
141 
143 
144  template<typename Range, typename Body>
145  class finish_scan: public task {
150  public:
153 
155  __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
156  if( my_result.my_left )
157  my_result.my_left_is_final = false;
158  if( my_right_zombie && my_sum )
159  ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
160  __TBB_ASSERT( !my_return_slot, NULL );
163  } else {
164  destroy( my_result );
165  }
166  if( my_right_zombie && !my_sum && !my_result.my_right ) {
167  destroy(*my_right_zombie);
168  my_right_zombie = NULL;
169  }
170  return NULL;
171  }
172 
173  finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
174  my_sum(sum_),
175  my_return_slot(return_slot_),
176  my_right_zombie(NULL),
177  my_result(result_)
178  {
179  __TBB_ASSERT( !my_return_slot, NULL );
180  }
181  };
182 
184 
185  template<typename Range, typename Body, typename Partitioner=simple_partitioner>
186  class start_scan: public task {
197  Range my_range;
198  typename Partitioner::partition_type my_partition;
200  public:
201  start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
202  my_body(parent_.my_body),
203  my_sum(parent_.my_sum),
204  my_return_slot(&return_slot_),
205  my_parent_sum(parent_sum_),
206  my_is_final(parent_.my_is_final),
207  my_is_right_child(false),
208  my_range(parent_.my_range,split()),
209  my_partition(parent_.my_partition,split())
210  {
211  __TBB_ASSERT( !*my_return_slot, NULL );
212  }
213 
214  start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
215  my_body(&body_),
216  my_sum(NULL),
217  my_return_slot(&return_slot_),
218  my_parent_sum(NULL),
219  my_is_final(true),
220  my_is_right_child(false),
221  my_range(range_),
222  my_partition(partitioner_)
223  {
224  __TBB_ASSERT( !*my_return_slot, NULL );
225  }
226 
227  static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
228  if( !range_.empty() ) {
229  typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
230  internal::sum_node<Range,Body>* root = NULL;
231  final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
232  start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
233  /*my_return_slot=*/root,
234  range_,
235  *temp_body,
236  partitioner_ );
237  temp_body->my_body.reverse_join(body_);
238  task::spawn_root_and_wait( pass1 );
239  if( root ) {
240  root->my_body = temp_body;
241  root->my_incoming = NULL;
242  root->my_stuff_last = &body_;
243  task::spawn_root_and_wait( *root );
244  } else {
245  body_.assign(temp_body->my_body);
246  temp_body->finish_construction( range_, NULL );
247  temp_body->destroy(*temp_body);
248  }
249  }
250  }
251  };
252 
253  template<typename Range, typename Body, typename Partitioner>
255  typedef internal::finish_scan<Range,Body> finish_pass1_type;
256  finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
257  // Inspecting p->result.left_sum would ordinarily be a race condition.
258  // But we inspect it only if we are not a stolen task, in which case we
259  // know that task assigning to p->result.left_sum has completed.
260  bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
261  if( treat_as_stolen ) {
262  // Invocation is for right child that has been really stolen or needs to be virtually stolen
263  p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
264  my_is_final = false;
265  }
266  task* next_task = NULL;
267  if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
268  if( my_is_final )
269  (my_body->my_body)( my_range, final_scan_tag() );
270  else if( my_sum )
271  (my_body->my_body)( my_range, pre_scan_tag() );
272  if( my_sum )
273  *my_sum = my_body;
274  __TBB_ASSERT( !*my_return_slot, NULL );
275  } else {
276  sum_node_type* result;
277  if( my_parent_sum )
278  result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
279  else
280  result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
281  finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
282  // Split off right child
283  start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
284  b.my_is_right_child = true;
285  // Left child is recycling of *this. Must recycle this before spawning b,
286  // otherwise b might complete and decrement c.ref_count() to zero, which
287  // would cause c.execute() to run prematurely.
288  recycle_as_child_of(c);
289  c.set_ref_count(2);
290  c.spawn(b);
291  my_sum = &result->my_left_sum;
292  my_return_slot = &result->my_left;
293  my_is_right_child = false;
294  next_task = this;
295  my_parent_sum = result;
296  __TBB_ASSERT( !*my_return_slot, NULL );
297  }
298  return next_task;
299  }
300 
301  template<typename Range, typename Value, typename Scan, typename ReverseJoin>
303  Value my_sum;
304  const Value& identity_element;
305  const Scan& my_scan;
306  const ReverseJoin& my_reverse_join;
307  public:
308  lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join)
309  : my_sum(identity)
310  , identity_element(identity)
311  , my_scan(scan)
312  , my_reverse_join(rev_join) {}
313 
317  , my_scan(b.my_scan)
319 
320  template<typename Tag>
321  void operator()( const Range& r, Tag tag ) {
322  my_sum = my_scan(r, my_sum, tag);
323  }
324 
327  }
328 
330  my_sum = b.my_sum;
331  }
332 
333  Value result() const {
334  return my_sum;
335  }
336  };
337 } // namespace internal
339 
340 // Requirements on Range concept are documented in blocked_range.h
341 
359 
361 
362 template<typename Range, typename Body>
363 void parallel_scan( const Range& range, Body& body ) {
365 }
366 
368 
369 template<typename Range, typename Body>
370 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
372 }
373 
375 
376 template<typename Range, typename Body>
377 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
379 }
380 
382 
383 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
384 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
385  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
387  return body.result();
388 }
389 
391 
392 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
393 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) {
394  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
395  tbb::parallel_scan(range,body,partitioner);
396  return body.result();
397 }
398 
400 
401 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
402 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) {
403  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
404  tbb::parallel_scan(range,body,partitioner);
405  return body.result();
406 }
407 
409 
410 } // namespace tbb
411 
413 #undef __TBB_parallel_scan_H_include_area
414 
415 #endif /* __TBB_parallel_scan_H */
416 
sum_node< Range, Body > sum_node_type
static bool is_final_scan()
Definition: parallel_scan.h:40
final_sum_type * my_body
Definition: parallel_scan.h:86
Base class for user-defined tasks.
Definition: task.h:615
sum_node< Range, Body > sum_node_type
Used to indicate that the final scan is being performed.
Definition: parallel_scan.h:39
final_sum_type ** my_sum
Body * my_stuff_last
Where to put result of last subrange, or NULL if not last subrange.
Definition: parallel_scan.h:56
void parallel_scan(const Range &range, Body &body)
Parallel prefix with default partitioner.
Base class for types that should not be assigned.
Definition: tbb_stddef.h:322
void operator()(const Range &r, Tag tag)
final_sum_type * my_body
finish_scan(sum_node_type *&return_slot_, final_sum_type **sum_, sum_node_type &result_)
#define __TBB_DEFAULT_PARTITIONER
Definition: tbb_config.h:596
Split work to be done in the scan.
Definition: parallel_scan.h:82
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
static internal::allocate_root_proxy allocate_root()
Returns proxy for overloaded new that allocates a root task.
Definition: task.h:663
void recycle_as_continuation()
Change this to be a continuation of its former self.
Definition: task.h:711
lambda_scan_body(const Value &identity, const Scan &scan, const ReverseJoin &rev_join)
start_scan(sum_node_type *&return_slot_, const Range &range_, final_sum_type &body_, const Partitioner &partitioner_)
final_sum_type * my_left_sum
Definition: parallel_scan.h:89
task * create_child(const Range &range_, final_sum_type &f, sum_node *n, final_sum_type *incoming_, Body *stuff_last_)
Used to indicate that the initial scan is being performed.
Definition: parallel_scan.h:32
void finish_construction(const Range &range_, Body *stuff_last_)
Definition: parallel_scan.h:66
aligned_space< Range > my_range
Definition: parallel_scan.h:54
static void spawn_root_and_wait(task &root)
Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
Definition: task.h:808
void const char const char int ITT_FORMAT __itt_group_sync p
void reverse_join(lambda_scan_body &a)
static void run(const Range &range_, Body &body_, const Partitioner &partitioner_)
Initial task to split the work.
lambda_scan_body(lambda_scan_body &b, split)
A simple partitioner.
Definition: partitioner.h:586
final_sum< Range, Body > final_sum_type
Definition: parallel_scan.h:83
Performs final scan for a leaf.
Definition: parallel_scan.h:50
void recycle_as_child_of(task &new_parent)
Change this to be a child of new_parent.
Definition: task.h:725
task * execute() __TBB_override
Should be overridden by derived classes.
Dummy type that distinguishes splitting constructor from copy constructor.
Definition: tbb_stddef.h:416
sum_node_type ** my_return_slot
sum_node_type *& my_return_slot
int ref_count() const
The internal reference count.
Definition: task.h:915
void set_ref_count(int count)
Set reference count.
Definition: task.h:761
Partitioner::partition_type my_partition
An auto partitioner.
Definition: partitioner.h:613
#define __TBB_override
Definition: tbb_stddef.h:240
Combine partial results.
static bool is_final_scan()
Definition: parallel_scan.h:33
task * execute() __TBB_override
Should be overridden by derived classes.
task * execute() __TBB_override
Should be overridden by derived classes.
final_sum_type * my_incoming
Definition: parallel_scan.h:85
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
const ReverseJoin & my_reverse_join
final_sum< Range, Body > final_sum_type
final_sum_type **const my_sum
sum_node(const Range range_, bool left_is_final_)
Definition: parallel_scan.h:94
void poison_pointer(T *__TBB_atomic &)
Definition: tbb_stddef.h:305
sum_node_type * my_parent_sum
The graph class.
final_sum< Range, Body > final_sum_type
task * execute() __TBB_override
Should be overridden by derived classes.
Definition: parallel_scan.h:71
final_sum_type * my_right_zombie
void assign(lambda_scan_body &b)

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.