Home ⌂Doc Index ◂Up ▴
Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
flow_graph_opencl_node.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2020 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
18 
19 #if !defined(__TBB_show_deprecation_message_flow_graph_opencl_node_H) && defined(__TBB_show_deprecated_header_message)
20 #define __TBB_show_deprecation_message_flow_graph_opencl_node_H
21 #pragma message("TBB Warning: tbb/flow_graph_opencl_node.h is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
22 #endif
23 
24 #if defined(__TBB_show_deprecated_header_message)
25 #undef __TBB_show_deprecated_header_message
26 #endif
27 
28 #ifndef __TBB_flow_graph_opencl_node_H
29 #define __TBB_flow_graph_opencl_node_H
30 
31 #define __TBB_flow_graph_opencl_node_H_include_area
33 
34 #include "tbb/tbb_config.h"
35 #if __TBB_PREVIEW_OPENCL_NODE
36 
37 #include "flow_graph.h"
38 
39 #include <vector>
40 #include <string>
41 #include <algorithm>
42 #include <iostream>
43 #include <fstream>
44 #include <map>
45 #include <mutex>
46 
47 #ifdef __APPLE__
48 #include <OpenCL/opencl.h>
49 #else
50 #include <CL/cl.h>
51 #endif
52 
53 namespace tbb {
54 namespace flow {
55 
56 namespace interface11 {
57 
58 template <typename DeviceFilter>
60 
61 namespace opencl_info {
63 }
64 
65 template <typename Factory>
67 
68 inline void enforce_cl_retcode(cl_int err, std::string msg) {
69  if (err != CL_SUCCESS) {
70  std::cerr << msg << "; error code: " << err << std::endl;
71  throw msg;
72  }
73 }
74 
75 template <typename T>
76 T event_info(cl_event e, cl_event_info i) {
77  T res;
78  enforce_cl_retcode(clGetEventInfo(e, i, sizeof(res), &res, NULL), "Failed to get OpenCL event information");
79  return res;
80 }
81 
82 template <typename T>
83 T device_info(cl_device_id d, cl_device_info i) {
84  T res;
85  enforce_cl_retcode(clGetDeviceInfo(d, i, sizeof(res), &res, NULL), "Failed to get OpenCL device information");
86  return res;
87 }
88 
89 template <>
90 inline std::string device_info<std::string>(cl_device_id d, cl_device_info i) {
91  size_t required;
92  enforce_cl_retcode(clGetDeviceInfo(d, i, 0, NULL, &required), "Failed to get OpenCL device information");
93 
94  char *buff = (char*)alloca(required);
95  enforce_cl_retcode(clGetDeviceInfo(d, i, required, buff, NULL), "Failed to get OpenCL device information");
96 
97  return buff;
98 }
99 
100 template <typename T>
101 T platform_info(cl_platform_id p, cl_platform_info i) {
102  T res;
103  enforce_cl_retcode(clGetPlatformInfo(p, i, sizeof(res), &res, NULL), "Failed to get OpenCL platform information");
104  return res;
105 }
106 
107 template <>
108 inline std::string platform_info<std::string>(cl_platform_id p, cl_platform_info i) {
109  size_t required;
110  enforce_cl_retcode(clGetPlatformInfo(p, i, 0, NULL, &required), "Failed to get OpenCL platform information");
111 
112  char *buff = (char*)alloca(required);
113  enforce_cl_retcode(clGetPlatformInfo(p, i, required, buff, NULL), "Failed to get OpenCL platform information");
114 
115  return buff;
116 }
117 
118 
120 public:
121  typedef size_t device_id_type;
122  enum : device_id_type {
123  unknown = device_id_type( -2 ),
124  host = device_id_type( -1 )
125  };
126 
127  opencl_device() : my_device_id( unknown ), my_cl_device_id( NULL ), my_cl_command_queue( NULL ) {}
128 
129  opencl_device( cl_device_id d_id ) : my_device_id( unknown ), my_cl_device_id( d_id ), my_cl_command_queue( NULL ) {}
130 
131  opencl_device( cl_device_id cl_d_id, device_id_type device_id ) : my_device_id( device_id ), my_cl_device_id( cl_d_id ), my_cl_command_queue( NULL ) {}
132 
133  std::string platform_profile() const {
134  return platform_info<std::string>( platform_id(), CL_PLATFORM_PROFILE );
135  }
136  std::string platform_version() const {
137  return platform_info<std::string>( platform_id(), CL_PLATFORM_VERSION );
138  }
139  std::string platform_name() const {
140  return platform_info<std::string>( platform_id(), CL_PLATFORM_NAME );
141  }
142  std::string platform_vendor() const {
143  return platform_info<std::string>( platform_id(), CL_PLATFORM_VENDOR );
144  }
145  std::string platform_extensions() const {
146  return platform_info<std::string>( platform_id(), CL_PLATFORM_EXTENSIONS );
147  }
148 
149  template <typename T>
150  void info( cl_device_info i, T &t ) const {
151  t = device_info<T>( my_cl_device_id, i );
152  }
153  std::string version() const {
154  // The version string format: OpenCL<space><major_version.minor_version><space><vendor-specific information>
155  return device_info<std::string>( my_cl_device_id, CL_DEVICE_VERSION );
156  }
157  int major_version() const {
158  int major;
159  std::sscanf( version().c_str(), "OpenCL %d", &major );
160  return major;
161  }
162  int minor_version() const {
163  int major, minor;
164  std::sscanf( version().c_str(), "OpenCL %d.%d", &major, &minor );
165  return minor;
166  }
168 #if CL_VERSION_2_0
169  if ( major_version() >= 2 )
170  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
171  else
172 #endif /* CL_VERSION_2_0 */
173  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
174  }
176 #if CL_VERSION_2_0
177  if ( major_version() >= 2 )
178  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
179  else
180 #endif /* CL_VERSION_2_0 */
181  return false;
182  }
183  std::array<size_t, 3> max_work_item_sizes() const {
184  return device_info<std::array<size_t, 3>>( my_cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES );
185  }
186  size_t max_work_group_size() const {
187  return device_info<size_t>( my_cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE );
188  }
189  bool built_in_kernel_available( const std::string& k ) const {
190  const std::string semi = ";";
191  // Added semicolumns to force an exact match (to avoid a partial match, e.g. "add" is partly matched with "madd").
192  return (semi + built_in_kernels() + semi).find( semi + k + semi ) != std::string::npos;
193  }
194  std::string built_in_kernels() const {
195  return device_info<std::string>( my_cl_device_id, CL_DEVICE_BUILT_IN_KERNELS );
196  }
197  std::string name() const {
198  return device_info<std::string>( my_cl_device_id, CL_DEVICE_NAME );
199  }
200  cl_bool available() const {
201  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_AVAILABLE );
202  }
203  cl_bool compiler_available() const {
204  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_COMPILER_AVAILABLE );
205  }
206  cl_bool linker_available() const {
207  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_LINKER_AVAILABLE );
208  }
209  bool extension_available( const std::string &ext ) const {
210  const std::string space = " ";
211  // Added space to force an exact match (to avoid a partial match, e.g. "ext" is partly matched with "ext2").
212  return (space + extensions() + space).find( space + ext + space ) != std::string::npos;
213  }
214  std::string extensions() const {
215  return device_info<std::string>( my_cl_device_id, CL_DEVICE_EXTENSIONS );
216  }
217 
218  cl_device_type type() const {
219  return device_info<cl_device_type>( my_cl_device_id, CL_DEVICE_TYPE );
220  }
221 
222  std::string vendor() const {
223  return device_info<std::string>( my_cl_device_id, CL_DEVICE_VENDOR );
224  }
225 
226  cl_uint address_bits() const {
227  return device_info<cl_uint>( my_cl_device_id, CL_DEVICE_ADDRESS_BITS );
228  }
229 
230  cl_device_id device_id() const {
231  return my_cl_device_id;
232  }
233 
234  cl_command_queue command_queue() const {
235  return my_cl_command_queue;
236  }
237 
238  void set_command_queue( cl_command_queue cmd_queue ) {
239  my_cl_command_queue = cmd_queue;
240  }
241 
242  cl_platform_id platform_id() const {
243  return device_info<cl_platform_id>( my_cl_device_id, CL_DEVICE_PLATFORM );
244  }
245 
246 private:
247 
249  cl_device_id my_cl_device_id;
250  cl_command_queue my_cl_command_queue;
251 
252  friend bool operator==(opencl_device d1, opencl_device d2) { return d1.my_cl_device_id == d2.my_cl_device_id; }
253 
254  template <typename DeviceFilter>
255  friend class opencl_factory;
256  template <typename Factory>
257  friend class opencl_memory;
258  template <typename Factory>
259  friend class opencl_program;
260 
261 #if TBB_USE_ASSERT
262  template <typename T, typename Factory>
263  friend class opencl_buffer;
264 #endif
265 };
266 
268  typedef std::vector<opencl_device> container_type;
269 public:
270  typedef container_type::iterator iterator;
271  typedef container_type::const_iterator const_iterator;
272  typedef container_type::size_type size_type;
273 
275  opencl_device_list( std::initializer_list<opencl_device> il ) : my_container( il ) {}
276 
277  void add( opencl_device d ) { my_container.push_back( d ); }
278  size_type size() const { return my_container.size(); }
279  bool empty() const { return my_container.empty(); }
280  iterator begin() { return my_container.begin(); }
281  iterator end() { return my_container.end(); }
282  const_iterator begin() const { return my_container.begin(); }
283  const_iterator end() const { return my_container.end(); }
284  const_iterator cbegin() const { return my_container.cbegin(); }
285  const_iterator cend() const { return my_container.cend(); }
286 
287 private:
289 };
290 
291 namespace internal {
292 
293 // Retrieve all OpenCL devices from machine
295  opencl_device_list opencl_devices;
296 
297  cl_uint num_platforms;
298  enforce_cl_retcode(clGetPlatformIDs(0, NULL, &num_platforms), "clGetPlatformIDs failed");
299 
300  std::vector<cl_platform_id> platforms(num_platforms);
301  enforce_cl_retcode(clGetPlatformIDs(num_platforms, platforms.data(), NULL), "clGetPlatformIDs failed");
302 
303  cl_uint num_devices;
304  std::vector<cl_platform_id>::iterator platforms_it = platforms.begin();
305  cl_uint num_all_devices = 0;
306  while (platforms_it != platforms.end()) {
307  cl_int err = clGetDeviceIDs(*platforms_it, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
308  if (err == CL_DEVICE_NOT_FOUND) {
309  platforms_it = platforms.erase(platforms_it);
310  }
311  else {
312  enforce_cl_retcode(err, "clGetDeviceIDs failed");
313  num_all_devices += num_devices;
314  ++platforms_it;
315  }
316  }
317 
318  std::vector<cl_device_id> devices(num_all_devices);
319  std::vector<cl_device_id>::iterator devices_it = devices.begin();
320  for (auto p = platforms.begin(); p != platforms.end(); ++p) {
321  enforce_cl_retcode(clGetDeviceIDs((*p), CL_DEVICE_TYPE_ALL, (cl_uint)std::distance(devices_it, devices.end()), &*devices_it, &num_devices), "clGetDeviceIDs failed");
322  devices_it += num_devices;
323  }
324 
325  for (auto d = devices.begin(); d != devices.end(); ++d) {
326  opencl_devices.add(opencl_device((*d)));
327  }
328 
329  return opencl_devices;
330 }
331 
332 } // namespace internal
333 
334 // TODO: consider this namespace as public API
335 namespace opencl_info {
336 
338  // Static storage for all available OpenCL devices on machine
339  static const opencl_device_list my_devices = internal::find_available_devices();
340  return my_devices;
341  }
342 
343 } // namespace opencl_info
344 
345 
347 public:
348  virtual void call() = 0;
349  virtual ~callback_base() {}
350 };
351 
352 template <typename Callback, typename T>
353 class callback : public callback_base {
354  Callback my_callback;
356 public:
357  callback( Callback c, const T& t ) : my_callback( c ), my_data( t ) {}
358 
360  my_callback( my_data );
361  }
362 };
363 
364 template <typename T, typename Factory = opencl_info::default_opencl_factory>
366 public:
367  typedef T value_type;
368 
369  opencl_async_msg() : my_callback_flag_ptr( std::make_shared< tbb::atomic<bool>>() ) {
370  my_callback_flag_ptr->store<tbb::relaxed>(false);
371  }
372 
373  explicit opencl_async_msg( const T& data ) : my_data(data), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
374  my_callback_flag_ptr->store<tbb::relaxed>(false);
375  }
376 
377  opencl_async_msg( const T& data, cl_event event ) : my_data(data), my_event(event), my_is_event(true), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
378  my_callback_flag_ptr->store<tbb::relaxed>(false);
379  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
380  }
381 
382  T& data( bool wait = true ) {
383  if ( my_is_event && wait ) {
384  enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
385  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
386  my_is_event = false;
387  }
388  return my_data;
389  }
390 
391  const T& data( bool wait = true ) const {
392  if ( my_is_event && wait ) {
393  enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
394  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
395  my_is_event = false;
396  }
397  return my_data;
398  }
399 
400  opencl_async_msg( const opencl_async_msg &dmsg ) : async_msg<T>(dmsg),
401  my_data(dmsg.my_data), my_event(dmsg.my_event), my_is_event( dmsg.my_is_event ),
402  my_callback_flag_ptr(dmsg.my_callback_flag_ptr)
403  {
404  if ( my_is_event )
405  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
406  }
407 
408  opencl_async_msg( opencl_async_msg &&dmsg ) : async_msg<T>(std::move(dmsg)),
409  my_data(std::move(dmsg.my_data)), my_event(dmsg.my_event), my_is_event(dmsg.my_is_event),
410  my_callback_flag_ptr( std::move(dmsg.my_callback_flag_ptr) )
411  {
412  dmsg.my_is_event = false;
413  }
414 
417 
418  // Release original event
419  if ( my_is_event )
420  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to retain an event" );
421 
422  my_data = dmsg.my_data;
423  my_event = dmsg.my_event;
424  my_is_event = dmsg.my_is_event;
425 
426  // Retain copied event
427  if ( my_is_event )
428  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
429 
430  my_callback_flag_ptr = dmsg.my_callback_flag_ptr;
431  return *this;
432  }
433 
435  if ( my_is_event )
436  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
437  }
438 
439  cl_event const * get_event() const { return my_is_event ? &my_event : NULL; }
440  void set_event( cl_event e ) const {
441  if ( my_is_event ) {
442  cl_command_queue cq = event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE );
443  if ( cq != event_info<cl_command_queue>( e, CL_EVENT_COMMAND_QUEUE ) )
444  enforce_cl_retcode( clFlush( cq ), "Failed to flush an OpenCL command queue" );
445  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
446  }
447  my_is_event = true;
448  my_event = e;
449  clRetainEvent( my_event );
450  }
451 
452  void clear_event() const {
453  if ( my_is_event ) {
454  enforce_cl_retcode( clFlush( event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE ) ), "Failed to flush an OpenCL command queue" );
455  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
456  }
457  my_is_event = false;
458  }
459 
460  template <typename Callback>
461  void register_callback( Callback c ) const {
462  __TBB_ASSERT( my_is_event, "The OpenCL event is not set" );
463  enforce_cl_retcode( clSetEventCallback( my_event, CL_COMPLETE, register_callback_func, new callback<Callback, T>( c, my_data ) ), "Failed to set an OpenCL callback" );
464  }
465 
466  operator T&() { return data(); }
467  operator const T&() const { return data(); }
468 
469 protected:
470  // Overridden in this derived class to inform that
471  // async calculation chain is over
472  void finalize() const __TBB_override {
474  if (! my_callback_flag_ptr->fetch_and_store(true)) {
475  opencl_async_msg a(*this);
476  if (my_is_event) {
477  register_callback([a](const T& t) mutable {
478  a.set(t);
479  });
480  }
481  else {
482  a.set(my_data);
483  }
484  }
485  clear_event();
486  }
487 
488 private:
489  static void CL_CALLBACK register_callback_func( cl_event, cl_int event_command_exec_status, void *data ) {
490  tbb::internal::suppress_unused_warning( event_command_exec_status );
491  __TBB_ASSERT( event_command_exec_status == CL_COMPLETE, NULL );
492  __TBB_ASSERT( data, NULL );
493  callback_base *c = static_cast<callback_base*>(data);
494  c->call();
495  delete c;
496  }
497 
499  mutable cl_event my_event;
500  mutable bool my_is_event = false;
501 
502  std::shared_ptr< tbb::atomic<bool> > my_callback_flag_ptr;
503 };
504 
505 template <typename K, typename T, typename Factory>
508  const T &t = dmsg.data( false );
509  __TBB_STATIC_ASSERT( true, "" );
510  return key_from_message<K, T>( t );
511 }
512 
513 template <typename Factory>
515 public:
517  opencl_memory( Factory &f ) : my_host_ptr( NULL ), my_factory( &f ), my_sending_event_present( false ) {
518  my_curr_device_id = my_factory->devices().begin()->my_device_id;
519  }
520 
521  virtual ~opencl_memory() {
522  if ( my_sending_event_present ) enforce_cl_retcode( clReleaseEvent( my_sending_event ), "Failed to release an event for the OpenCL buffer" );
523  enforce_cl_retcode( clReleaseMemObject( my_cl_mem ), "Failed to release an memory object" );
524  }
525 
526  cl_mem get_cl_mem() const {
527  return my_cl_mem;
528  }
529 
530  void* get_host_ptr() {
531  if ( !my_host_ptr ) {
533  d.data();
534  __TBB_ASSERT( d.data() == my_host_ptr, NULL );
535  }
536  return my_host_ptr;
537  }
538 
539  Factory *factory() const { return my_factory; }
540 
543  if (e) {
545  } else {
547  }
548 
549  // Concurrent receives are prohibited so we do not worry about synchronization.
551  map_memory(*my_factory->devices().begin(), d);
553  my_host_ptr = d.data(false);
554  }
555  // Release the sending event
557  enforce_cl_retcode(clReleaseEvent(my_sending_event), "Failed to release an event");
558  my_sending_event_present = false;
559  }
560  return d;
561  }
562 
564  opencl_device::device_id_type device_id = device.my_device_id;
565  if (!my_factory->is_same_context(my_curr_device_id.load<tbb::acquire>(), device_id)) {
566  {
568  if (!my_factory->is_same_context(my_curr_device_id.load<tbb::relaxed>(), device_id)) {
569  __TBB_ASSERT(my_host_ptr, "The buffer has not been mapped");
571  my_factory->enqueue_unmap_buffer(device, *this, d);
572  my_sending_event = *d.get_event();
574  enforce_cl_retcode(clRetainEvent(my_sending_event), "Failed to retain an event");
575  my_host_ptr = NULL;
576  my_curr_device_id.store<tbb::release>(device_id);
577  }
578  }
580  }
581 
582  // !e means that buffer has come from the host
584 
585  __TBB_ASSERT(!my_host_ptr, "The buffer has not been unmapped");
587  }
588 
590 protected:
591  cl_mem my_cl_mem;
592  tbb::atomic<opencl_device::device_id_type> my_curr_device_id;
593  void* my_host_ptr;
594  Factory *my_factory;
595 
599 };
600 
601 template <typename Factory>
602 class opencl_buffer_impl : public opencl_memory<Factory> {
603  size_t my_size;
604 public:
605  opencl_buffer_impl( size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
606  cl_int err;
607  this->my_cl_mem = clCreateBuffer( this->my_factory->context(), CL_MEM_ALLOC_HOST_PTR, size, NULL, &err );
608  enforce_cl_retcode( err, "Failed to create an OpenCL buffer" );
609  }
610 
611  // The constructor for subbuffers.
612  opencl_buffer_impl( cl_mem m, size_t index, size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
613  cl_int err;
614  cl_buffer_region region = { index, size };
615  this->my_cl_mem = clCreateSubBuffer( m, 0, CL_BUFFER_CREATE_TYPE_REGION, &region, &err );
616  enforce_cl_retcode( err, "Failed to create an OpenCL subbuffer" );
617  }
618 
619  size_t size() const {
620  return my_size;
621  }
622 
624  this->my_factory->enqueue_map_buffer( device, *this, dmsg );
625  }
626 
627 #if TBB_USE_ASSERT
628  template <typename, typename>
629  friend class opencl_buffer;
630 #endif
631 };
632 
637 };
638 
639 template <typename T, typename Factory = opencl_info::default_opencl_factory>
642 
643 template <typename T, typename Factory = opencl_info::default_opencl_factory>
646 public:
647  typedef cl_mem native_object_type;
649  typedef Factory opencl_factory_type;
650 
651  template<access_type a> using iterator = T*;
652 
653  template <access_type a>
654  iterator<a> access() const {
655  T* ptr = (T*)my_impl->get_host_ptr();
656  __TBB_ASSERT( ptr, NULL );
657  return iterator<a>( ptr );
658  }
659 
660  T* data() const { return &access<read_write>()[0]; }
661 
662  template <access_type a = read_write>
663  iterator<a> begin() const { return access<a>(); }
664 
665  template <access_type a = read_write>
666  iterator<a> end() const { return access<a>()+my_impl->size()/sizeof(T); }
667 
668  size_t size() const { return my_impl->size()/sizeof(T); }
669 
670  T& operator[] ( ptrdiff_t k ) { return begin()[k]; }
671 
673  opencl_buffer( size_t size );
674  opencl_buffer( Factory &f, size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), f ) ) {}
675 
676  cl_mem native_object() const {
677  return my_impl->get_cl_mem();
678  }
679 
680  const opencl_buffer& memory_object() const {
681  return *this;
682  }
683 
684  void send( opencl_device device, opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
685  __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
686  opencl_async_msg<void*, Factory> d = my_impl->send( device, dependency.get_event() );
687  const cl_event *e = d.get_event();
688  if ( e ) dependency.set_event( *e );
689  else dependency.clear_event();
690  }
691  void receive( const opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
692  __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
693  opencl_async_msg<void*, Factory> d = my_impl->receive( dependency.get_event() );
694  const cl_event *e = d.get_event();
695  if ( e ) dependency.set_event( *e );
696  else dependency.clear_event();
697  }
698 
699  opencl_subbuffer<T, Factory> subbuffer( size_t index, size_t size ) const;
700 private:
701  // The constructor for subbuffers.
702  opencl_buffer( Factory &f, cl_mem m, size_t index, size_t size ) : my_impl( std::make_shared<impl_type>( m, index*sizeof(T), size*sizeof(T), f ) ) {}
703 
705 
706  std::shared_ptr<impl_type> my_impl;
707 
708  friend bool operator==(const opencl_buffer<T, Factory> &lhs, const opencl_buffer<T, Factory> &rhs) {
709  return lhs.my_impl == rhs.my_impl;
710  }
711 
712  template <typename>
713  friend class opencl_factory;
714  template <typename, typename>
715  friend class opencl_subbuffer;
716 };
717 
718 template <typename T, typename Factory>
720 opencl_subbuffer : public opencl_buffer<T, Factory> {
722 public:
724  opencl_subbuffer( const opencl_buffer<T, Factory> &owner, size_t index, size_t size ) :
725  opencl_buffer<T, Factory>( *owner.my_impl->factory(), owner.native_object(), index, size ), my_owner( owner ) {}
726 };
727 
728 template <typename T, typename Factory>
730  return opencl_subbuffer<T, Factory>( *this, index, size );
731 }
732 
733 
734 #define is_typedef(type) \
735  template <typename T> \
736  struct is_##type { \
737  template <typename C> \
738  static std::true_type check( typename C::type* ); \
739  template <typename C> \
740  static std::false_type check( ... ); \
741  \
742  static const bool value = decltype(check<T>(0))::value; \
743  }
744 
745 is_typedef( native_object_type );
746 is_typedef( memory_object_type );
747 
748 template <typename T>
749 typename std::enable_if<is_native_object_type<T>::value, typename T::native_object_type>::type get_native_object( const T &t ) {
750  return t.native_object();
751 }
752 
753 template <typename T>
755  return t;
756 }
757 
758 // send_if_memory_object checks if the T type has memory_object_type and call the send method for the object.
759 template <typename T, typename Factory>
761  const T &t = dmsg.data( false );
762  typedef typename T::memory_object_type mem_obj_t;
763  mem_obj_t mem_obj = t.memory_object();
765  if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
766  mem_obj.send( device, d );
767  if ( d.get_event() ) dmsg.set_event( *d.get_event() );
768 }
769 
770 template <typename T>
772  typedef typename T::memory_object_type mem_obj_t;
773  mem_obj_t mem_obj = t.memory_object();
775  mem_obj.send( device, dmsg );
776 }
777 
778 template <typename T>
780 
781 // receive_if_memory_object checks if the T type has memory_object_type and call the receive method for the object.
782 template <typename T, typename Factory>
784  const T &t = dmsg.data( false );
785  typedef typename T::memory_object_type mem_obj_t;
786  mem_obj_t mem_obj = t.memory_object();
788  if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
789  mem_obj.receive( d );
790  if ( d.get_event() ) dmsg.set_event( *d.get_event() );
791 }
792 
793 template <typename T>
795 
797 public:
798  typedef size_t range_index_type;
799  typedef std::array<range_index_type, 3> nd_range_type;
800 
801  template <typename G = std::initializer_list<int>, typename L = std::initializer_list<int>,
802  typename = typename std::enable_if<!std::is_same<typename std::decay<G>::type, opencl_range>::value>::type>
803  opencl_range(G&& global_work = std::initializer_list<int>({ 0 }), L&& local_work = std::initializer_list<int>({ 0, 0, 0 })) {
804  auto g_it = global_work.begin();
805  auto l_it = local_work.begin();
806  my_global_work_size = { {size_t(-1), size_t(-1), size_t(-1)} };
807  // my_local_work_size is still uninitialized
808  for (int s = 0; s < 3 && g_it != global_work.end(); ++g_it, ++l_it, ++s) {
809  __TBB_ASSERT(l_it != local_work.end(), "global_work & local_work must have same size");
810  my_global_work_size[s] = *g_it;
811  my_local_work_size[s] = *l_it;
812  }
813  }
814 
815  const nd_range_type& global_range() const { return my_global_work_size; }
816  const nd_range_type& local_range() const { return my_local_work_size; }
817 
818 private:
821 };
822 
823 template <typename DeviceFilter>
825 public:
828 
830  public:
831  kernel( const kernel& k ) : my_factory( k.my_factory ) {
832  // Clone my_cl_kernel via opencl_program
833  size_t ret_size = 0;
834 
835  std::vector<char> kernel_name;
836  for ( size_t curr_size = 32;; curr_size <<= 1 ) {
837  kernel_name.resize( curr_size <<= 1 );
838  enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_FUNCTION_NAME, curr_size, kernel_name.data(), &ret_size ), "Failed to get kernel info" );
839  if ( ret_size < curr_size ) break;
840  }
841 
842  cl_program program;
843  enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, &ret_size ), "Failed to get kernel info" );
844  __TBB_ASSERT( ret_size == sizeof(program), NULL );
845 
846  my_cl_kernel = opencl_program< factory_type >( my_factory, program ).get_cl_kernel( kernel_name.data() );
847  }
848 
850  enforce_cl_retcode( clReleaseKernel( my_cl_kernel ), "Failed to release a kernel" );
851  }
852 
853  private:
855 
856  kernel( const cl_kernel& k, factory_type& f ) : my_cl_kernel( k ), my_factory( f ) {}
857 
858  // Data
859  cl_kernel my_cl_kernel;
861 
862  template <typename DeviceFilter_>
863  friend class opencl_factory;
864 
865  template <typename Factory>
866  friend class opencl_program;
867  };
868 
869  typedef kernel kernel_type;
870 
871  // 'range_type' enables kernel_executor with range support
872  // it affects expectations for enqueue_kernel(.....) interface method
874 
877  if ( my_devices.size() ) {
878  for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
879  enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
880  }
881  enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
882  }
883  }
884 
885  bool init( const opencl_device_list &device_list ) {
886  tbb::spin_mutex::scoped_lock lock( my_devices_mutex );
887  if ( !my_devices.size() ) {
888  my_devices = device_list;
889  return true;
890  }
891  return false;
892  }
893 
894 
895 private:
896  template <typename Factory>
898  cl_event const* e1 = dmsg.get_event();
899  cl_event e2;
900  cl_int err;
901  void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
902  e1 == NULL ? 0 : 1, e1, &e2, &err );
903  enforce_cl_retcode( err, "Failed to map a buffer" );
904  dmsg.data( false ) = ptr;
905  dmsg.set_event( e2 );
906  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
907  }
908 
909 
910  template <typename Factory>
912  cl_event const* e1 = dmsg.get_event();
913  cl_event e2;
915  clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
916  "Failed to unmap a buffer" );
917  dmsg.set_event( e2 );
918  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
919  }
920 
921  // --------- Kernel argument & event list helpers --------- //
922  template <size_t NUM_ARGS, typename T>
923  void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>&, int&, int& place, const T& t ) {
924  auto p = get_native_object(t);
925  enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
926  }
927 
928  template <size_t NUM_ARGS, typename T, typename F>
929  void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const opencl_async_msg<T, F>& msg ) {
930  __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
931 
932  const cl_event * const e = msg.get_event();
933  if (e != NULL) {
934  events[num_events++] = *e;
935  }
936 
937  process_one_arg( kernel, events, num_events, place, msg.data(false) );
938  }
939 
940  template <size_t NUM_ARGS, typename T, typename ...Rest>
941  void process_arg_list( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const T& t, const Rest&... args ) {
942  process_one_arg( kernel, events, num_events, place, t );
943  process_arg_list( kernel, events, num_events, place, args... );
944  }
945 
946  template <size_t NUM_ARGS>
947  void process_arg_list( const kernel_type&, std::array<cl_event, NUM_ARGS>&, int&, int& ) {}
948  // ------------------------------------------- //
949  template <typename T>
950  void update_one_arg( cl_event, T& ) {}
951 
952  template <typename T, typename F>
953  void update_one_arg( cl_event e, opencl_async_msg<T, F>& msg ) {
954  msg.set_event( e );
955  }
956 
957  template <typename T, typename ...Rest>
958  void update_arg_list( cl_event e, T& t, Rest&... args ) {
959  update_one_arg( e, t );
960  update_arg_list( e, args... );
961  }
962 
963  void update_arg_list( cl_event ) {}
964  // ------------------------------------------- //
965 public:
966  template <typename ...Args>
967  void send_kernel( opencl_device device, const kernel_type& kernel, const range_type& work_size, Args&... args ) {
968  std::array<cl_event, sizeof...(Args)> events;
969  int num_events = 0;
970  int place = 0;
971  process_arg_list( kernel, events, num_events, place, args... );
972 
973  const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
974 
975  update_arg_list(e, args...);
976 
977  // Release our own reference to cl_event
978  enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
979  }
980 
981  // ------------------------------------------- //
982  template <typename T, typename ...Rest>
983  void send_data(opencl_device device, T& t, Rest&... args) {
984  send_if_memory_object( device, t );
985  send_data( device, args... );
986  }
987 
989  // ------------------------------------------- //
990 
991 private:
992  cl_event send_kernel_impl( opencl_device device, const cl_kernel& kernel,
993  const range_type& work_size, cl_uint num_events, cl_event* event_list ) {
994  const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
995  const typename range_type::nd_range_type& g_size = work_size.global_range();
996  const typename range_type::nd_range_type& l_size = work_size.local_range();
997  cl_uint s;
998  for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
999  cl_event event;
1001  clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
1002  g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
1003  "Failed to enqueue a kernel" );
1004  return event;
1005  }
1006 
1007  // ------------------------------------------- //
1008  template <typename T>
1009  bool get_event_from_one_arg( cl_event&, const T& ) {
1010  return false;
1011  }
1012 
1013  template <typename T, typename F>
1014  bool get_event_from_one_arg( cl_event& e, const opencl_async_msg<T, F>& msg) {
1015  cl_event const *e_ptr = msg.get_event();
1016 
1017  if ( e_ptr != NULL ) {
1018  e = *e_ptr;
1019  return true;
1020  }
1021 
1022  return false;
1023  }
1024 
1025  template <typename T, typename ...Rest>
1026  bool get_event_from_args( cl_event& e, const T& t, const Rest&... args ) {
1027  if ( get_event_from_one_arg( e, t ) ) {
1028  return true;
1029  }
1030 
1031  return get_event_from_args( e, args... );
1032  }
1033 
1034  bool get_event_from_args( cl_event& ) {
1035  return false;
1036  }
1037  // ------------------------------------------- //
1038 
1040  virtual ~finalize_fn() {}
1041  virtual void operator() () {}
1042  };
1043 
1044  template<typename Fn>
1045  struct finalize_fn_leaf : public finalize_fn {
1046  Fn my_fn;
1047  finalize_fn_leaf(Fn fn) : my_fn(fn) {}
1048  void operator() () __TBB_override { my_fn(); }
1049  };
1050 
1051  static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data) {
1052  tbb::internal::suppress_unused_warning(event_command_exec_status);
1053  __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1054 
1055  finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1056  __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1057  (*fn_ptr)();
1058 
1059  // Function pointer was created by 'new' & this callback must be called once only
1060  delete fn_ptr;
1061  }
1062 public:
1063  template <typename FinalizeFn, typename ...Args>
1064  void finalize( opencl_device device, FinalizeFn fn, Args&... args ) {
1065  cl_event e;
1066 
1067  if ( get_event_from_args( e, args... ) ) {
1068  enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1069  new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1070  }
1071 
1072  enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1073  }
1074 
1076  std::call_once( my_once_flag, &opencl_factory::init_once, this );
1077  return my_devices;
1078  }
1079 
1080 private:
1083  // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
1084  if ( d1 != opencl_device::host && d2 != opencl_device::host )
1085  return true;
1086  return d1 == d2;
1087  }
1088 private:
1089  opencl_factory( const opencl_factory& );
1090  opencl_factory& operator=(const opencl_factory&);
1091 
1092  cl_context context() {
1093  std::call_once( my_once_flag, &opencl_factory::init_once, this );
1094  return my_cl_context;
1095  }
1096 
1097  void init_once() {
1098  {
1099  tbb::spin_mutex::scoped_lock lock(my_devices_mutex);
1100  if (!my_devices.size())
1101  my_devices = DeviceFilter()( opencl_info::available_devices() );
1102  }
1103 
1104  enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1105  cl_platform_id platform_id = my_devices.begin()->platform_id();
1106  for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1107  enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1108 
1109  std::vector<cl_device_id> cl_device_ids;
1110  for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1111  cl_device_ids.push_back((*d).my_cl_device_id);
1112  }
1113 
1114  cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1115  cl_int err;
1116  cl_context ctx = clCreateContext(context_properties,
1117  (cl_uint)cl_device_ids.size(),
1118  cl_device_ids.data(),
1119  NULL, NULL, &err);
1120  enforce_cl_retcode(err, "Failed to create context");
1121  my_cl_context = ctx;
1122 
1123  size_t device_counter = 0;
1124  for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1125  (*d).my_device_id = device_counter++;
1126  cl_int err2;
1127  cl_command_queue cq;
1128 #if CL_VERSION_2_0
1129  if ((*d).major_version() >= 2) {
1130  if ((*d).out_of_order_exec_mode_on_host_present()) {
1131  cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1132  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1133  } else {
1134  cl_queue_properties props[] = { 0 };
1135  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1136  }
1137  } else
1138 #endif
1139  {
1140  cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1141  // Suppress "declared deprecated" warning for the next line.
1142 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1143 #pragma GCC diagnostic push
1144 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1145 #endif
1146 #if _MSC_VER || __INTEL_COMPILER
1147 #pragma warning( push )
1148 #if __INTEL_COMPILER
1149 #pragma warning (disable: 1478)
1150 #else
1151 #pragma warning (disable: 4996)
1152 #endif
1153 #endif
1154  cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1155 #if _MSC_VER || __INTEL_COMPILER
1156 #pragma warning( pop )
1157 #endif
1158 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1159 #pragma GCC diagnostic pop
1160 #endif
1161  }
1162  enforce_cl_retcode(err2, "Failed to create command queue");
1163  (*d).my_cl_command_queue = cq;
1164  }
1165  }
1166 
1167  std::once_flag my_once_flag;
1169  cl_context my_cl_context;
1170 
1172 
1173  template <typename Factory>
1174  friend class opencl_program;
1175  template <typename Factory>
1176  friend class opencl_buffer_impl;
1177  template <typename Factory>
1178  friend class opencl_memory;
1179 }; // class opencl_factory
1180 
1181 // TODO: consider this namespace as public API
1182 namespace opencl_info {
1183 
1184 // Default types
1185 
1186 template <typename Factory>
1189  __TBB_ASSERT(!f.devices().empty(), "No available devices");
1190  return *(f.devices().begin());
1191  }
1192 };
1193 
1196  opencl_device_list dl;
1197  cl_platform_id platform_id = devices.begin()->platform_id();
1198  for (opencl_device_list::const_iterator it = devices.cbegin(); it != devices.cend(); ++it) {
1199  if (it->platform_id() == platform_id) {
1200  dl.add(*it);
1201  }
1202  }
1203  return dl;
1204  }
1205 };
1206 
1207 class default_opencl_factory : public opencl_factory < default_device_filter >, tbb::internal::no_copy {
1208 public:
1210 
1212 
1213 private:
1214  default_opencl_factory() = default;
1215 };
1216 
1219  return default_factory;
1220 }
1221 
1222 } // namespace opencl_info
1223 
1224 template <typename T, typename Factory>
1225 opencl_buffer<T, Factory>::opencl_buffer( size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), opencl_info::default_factory() ) ) {}
1226 
1227 
1229  SOURCE,
1230  PRECOMPILED,
1231  SPIR
1232 };
1233 
1234 template <typename Factory = opencl_info::default_opencl_factory>
1236 public:
1237  typedef typename Factory::kernel_type kernel_type;
1238 
1239  opencl_program( Factory& factory, opencl_program_type type, const std::string& program_name ) : my_factory( factory ), my_type(type) , my_arg_str( program_name) {}
1240  opencl_program( Factory& factory, const char* program_name ) : opencl_program( factory, std::string( program_name ) ) {}
1241  opencl_program( Factory& factory, const std::string& program_name ) : opencl_program( factory, opencl_program_type::SOURCE, program_name ) {}
1242 
1243  opencl_program( opencl_program_type type, const std::string& program_name ) : opencl_program( opencl_info::default_factory(), type, program_name ) {}
1244  opencl_program( const char* program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1245  opencl_program( const std::string& program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1247 
1248  opencl_program( const opencl_program &src ) : my_factory( src.my_factory ), my_type( src.type ), my_arg_str( src.my_arg_str ), my_cl_program( src.my_cl_program ) {
1249  // Set my_do_once_flag to the called state.
1250  std::call_once( my_do_once_flag, [](){} );
1251  }
1252 
1253  kernel_type get_kernel( const std::string& k ) const {
1254  return kernel_type( get_cl_kernel(k), my_factory );
1255  }
1256 
1257 private:
1258  opencl_program( Factory& factory, cl_program program ) : my_factory( factory ), my_cl_program( program ) {
1259  // Set my_do_once_flag to the called state.
1260  std::call_once( my_do_once_flag, [](){} );
1261  }
1262 
1263  cl_kernel get_cl_kernel( const std::string& k ) const {
1264  std::call_once( my_do_once_flag, [this, &k](){ this->init( k ); } );
1265  cl_int err;
1266  cl_kernel kernel = clCreateKernel( my_cl_program, k.c_str(), &err );
1267  enforce_cl_retcode( err, std::string( "Failed to create kernel: " ) + k );
1268  return kernel;
1269  }
1270 
1271  class file_reader {
1272  public:
1273  file_reader( const std::string& filepath ) {
1274  std::ifstream file_descriptor( filepath, std::ifstream::binary );
1275  if ( !file_descriptor.is_open() ) {
1276  std::string str = std::string( "Could not open file: " ) + filepath;
1277  std::cerr << str << std::endl;
1278  throw str;
1279  }
1280  file_descriptor.seekg( 0, file_descriptor.end );
1281  size_t length = size_t( file_descriptor.tellg() );
1282  file_descriptor.seekg( 0, file_descriptor.beg );
1283  my_content.resize( length );
1284  char* begin = &*my_content.begin();
1285  file_descriptor.read( begin, length );
1286  file_descriptor.close();
1287  }
1288  const char* content() { return &*my_content.cbegin(); }
1289  size_t length() { return my_content.length(); }
1290  private:
1291  std::string my_content;
1292  };
1293 
1295  public:
1296  typedef void (CL_CALLBACK *cl_callback_type)(cl_program, void*);
1297  opencl_program_builder( Factory& f, const std::string& name, cl_program program,
1298  cl_uint num_devices, cl_device_id* device_list,
1299  const char* options, cl_callback_type callback,
1300  void* user_data ) {
1301  cl_int err = clBuildProgram( program, num_devices, device_list, options,
1302  callback, user_data );
1303  if( err == CL_SUCCESS )
1304  return;
1305  std::string str = std::string( "Failed to build program: " ) + name;
1306  if ( err == CL_BUILD_PROGRAM_FAILURE ) {
1307  const opencl_device_list &devices = f.devices();
1308  for ( auto d = devices.begin(); d != devices.end(); ++d ) {
1309  std::cerr << "Build log for device: " << (*d).name() << std::endl;
1310  size_t log_size;
1311  cl_int query_err = clGetProgramBuildInfo(
1312  program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
1313  &log_size );
1314  enforce_cl_retcode( query_err, "Failed to get build log size" );
1315  if( log_size ) {
1316  std::vector<char> output;
1317  output.resize( log_size );
1318  query_err = clGetProgramBuildInfo(
1319  program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG,
1320  output.size(), output.data(), NULL );
1321  enforce_cl_retcode( query_err, "Failed to get build output" );
1322  std::cerr << output.data() << std::endl;
1323  } else {
1324  std::cerr << "No build log available" << std::endl;
1325  }
1326  }
1327  }
1328  enforce_cl_retcode( err, str );
1329  }
1330  };
1331 
1333  public:
1334  template<typename Filter>
1335  opencl_device_filter( cl_uint& num_devices, cl_device_id* device_list,
1336  Filter filter, const char* message ) {
1337  for ( cl_uint i = 0; i < num_devices; ++i )
1338  if ( filter(device_list[i]) ) {
1339  device_list[i--] = device_list[--num_devices];
1340  }
1341  if ( !num_devices )
1342  enforce_cl_retcode( CL_DEVICE_NOT_AVAILABLE, message );
1343  }
1344  };
1345 
1346  void init( const std::string& ) const {
1347  cl_uint num_devices;
1348  enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_NUM_DEVICES, sizeof( num_devices ), &num_devices, NULL ),
1349  "Failed to get OpenCL context info" );
1350  if ( !num_devices )
1351  enforce_cl_retcode( CL_DEVICE_NOT_FOUND, "No supported devices found" );
1352  cl_device_id *device_list = (cl_device_id *)alloca( num_devices*sizeof( cl_device_id ) );
1353  enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_DEVICES, num_devices*sizeof( cl_device_id ), device_list, NULL ),
1354  "Failed to get OpenCL context info" );
1355  const char *options = NULL;
1356  switch ( my_type ) {
1358  file_reader fr( my_arg_str );
1359  const char *s[] = { fr.content() };
1360  const size_t l[] = { fr.length() };
1361  cl_int err;
1362  my_cl_program = clCreateProgramWithSource( my_factory.context(), 1, s, l, &err );
1363  enforce_cl_retcode( err, std::string( "Failed to create program: " ) + my_arg_str );
1365  num_devices, device_list,
1366  []( const opencl_device& d ) -> bool {
1367  return !d.compiler_available() || !d.linker_available();
1368  }, "No one device supports building program from sources" );
1370  my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1371  options, /*callback*/ NULL, /*user data*/NULL );
1372  break;
1373  }
1375  options = "-x spir";
1377  file_reader fr( my_arg_str );
1378  std::vector<const unsigned char*> s(
1379  num_devices, reinterpret_cast<const unsigned char*>(fr.content()) );
1380  std::vector<size_t> l( num_devices, fr.length() );
1381  std::vector<cl_int> bin_statuses( num_devices, -1 );
1382  cl_int err;
1383  my_cl_program = clCreateProgramWithBinary( my_factory.context(), num_devices,
1384  device_list, l.data(), s.data(),
1385  bin_statuses.data(), &err );
1386  if( err != CL_SUCCESS ) {
1387  std::string statuses_str;
1388  for (auto st = bin_statuses.begin(); st != bin_statuses.end(); ++st) {
1389  statuses_str += std::to_string((*st));
1390  }
1391 
1392  enforce_cl_retcode( err, std::string( "Failed to create program, error " + std::to_string( err ) + " : " ) + my_arg_str +
1393  std::string( ", binary_statuses = " ) + statuses_str );
1394  }
1396  my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1397  options, /*callback*/ NULL, /*user data*/NULL );
1398  break;
1399  }
1400  default:
1401  __TBB_ASSERT( false, "Unsupported program type" );
1402  }
1403  }
1404 
1405  Factory& my_factory;
1407  std::string my_arg_str;
1408  mutable cl_program my_cl_program;
1409  mutable std::once_flag my_do_once_flag;
1410 
1411  template <typename DeviceFilter>
1412  friend class opencl_factory;
1413 
1414  friend class Factory::kernel;
1415 };
1416 
1417 template<typename... Args>
1419 
1420 template<typename JP, typename Factory, typename... Ports>
1422 opencl_node< tuple<Ports...>, JP, Factory > : public streaming_node< tuple<Ports...>, JP, Factory > {
1423  typedef streaming_node < tuple<Ports...>, JP, Factory > base_type;
1424 public:
1425  typedef typename base_type::kernel_type kernel_type;
1426 
1427  opencl_node( graph &g, const kernel_type& kernel )
1428  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1429  {
1430  tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1431  }
1432 
1433  opencl_node( graph &g, const kernel_type& kernel, Factory &f )
1434  : base_type( g, kernel, opencl_info::default_device_selector <Factory >(), f )
1435  {
1436  tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1437  }
1438 
1439  template <typename DeviceSelector>
1440  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d, Factory &f)
1441  : base_type( g, kernel, d, f)
1442  {
1443  tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1444  }
1445 };
1446 
1447 template<typename JP, typename... Ports>
1449 opencl_node< tuple<Ports...>, JP > : public opencl_node < tuple<Ports...>, JP, opencl_info::default_opencl_factory > {
1451 public:
1452  typedef typename base_type::kernel_type kernel_type;
1453 
1454  opencl_node( graph &g, const kernel_type& kernel )
1455  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1456  {}
1457 
1458  template <typename DeviceSelector>
1459  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1460  : base_type( g, kernel, d, opencl_info::default_factory() )
1461  {}
1462 };
1463 
1464 template<typename... Ports>
1466 opencl_node< tuple<Ports...> > : public opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > {
1468 public:
1469  typedef typename base_type::kernel_type kernel_type;
1470 
1471  opencl_node( graph &g, const kernel_type& kernel )
1472  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1473  {}
1474 
1475  template <typename DeviceSelector>
1476  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1477  : base_type( g, kernel, d, opencl_info::default_factory() )
1478  {}
1479 };
1480 
1481 } // namespace interfaceX
1482 
1487 using interface11::opencl_buffer;
1489 using interface11::opencl_device;
1490 using interface11::opencl_device_list;
1491 using interface11::opencl_program;
1493 using interface11::opencl_async_msg;
1494 using interface11::opencl_factory;
1495 using interface11::opencl_range;
1496 
1497 } // namespace flow
1498 } // namespace tbb
1499 #endif /* __TBB_PREVIEW_OPENCL_NODE */
1500 
1502 #undef __TBB_flow_graph_opencl_node_H_include_area
1503 
1504 #endif // __TBB_flow_graph_opencl_node_H
opencl_subbuffer(const opencl_buffer< T, Factory > &owner, size_t index, size_t size)
friend bool operator==(opencl_device d1, opencl_device d2)
const opencl_buffer & memory_object() const
void receive(const opencl_async_msg< opencl_buffer, Factory > &dependency) const
void enqueue_unmap_buffer(opencl_device device, opencl_memory< Factory > &memory, opencl_async_msg< void *, Factory > &dmsg)
std::enable_if< is_native_object_type< T >::value, typename T::native_object_type >::type get_native_object(const T &t)
opencl_program(const std::string &program_name)
opencl_device_filter(cl_uint &num_devices, cl_device_id *device_list, Filter filter, const char *message)
T event_info(cl_event e, cl_event_info i)
opencl_async_msg(const T &data, cl_event event)
#define CODEPTR()
K key_from_message(const opencl_async_msg< T, Factory > &dmsg)
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
void finalize(opencl_device device, FinalizeFn fn, Args &... args)
bool get_event_from_one_arg(cl_event &, const T &)
void enqueue_map_buffer(opencl_device device, opencl_buffer_impl< Factory > &buffer, opencl_async_msg< void *, Factory > &dmsg)
opencl_program(Factory &factory, const std::string &program_name)
void map_memory(opencl_device device, opencl_async_msg< void *, Factory > &dmsg) __TBB_override
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s const char ITT_FORMAT s __itt_frame ITT_FORMAT p __itt_counter ITT_FORMAT p __itt_counter unsigned long long ITT_FORMAT lu __itt_counter unsigned long long ITT_FORMAT lu __itt_counter __itt_clock_domain unsigned long long void ITT_FORMAT p const wchar_t ITT_FORMAT S __itt_mark_type const wchar_t ITT_FORMAT S __itt_mark_type const char ITT_FORMAT s __itt_mark_type ITT_FORMAT d __itt_caller ITT_FORMAT p __itt_caller ITT_FORMAT p no args const __itt_domain __itt_clock_domain unsigned long long __itt_id ITT_FORMAT lu const __itt_domain __itt_clock_domain unsigned long long __itt_id __itt_id void * fn
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp begin
bool init(const opencl_device_list &device_list)
opencl_async_msg< void *, Factory > receive(const cl_event *e)
opencl_device(cl_device_id cl_d_id, device_id_type device_id)
void send(opencl_device device, opencl_async_msg< opencl_buffer, Factory > &dependency) const
void set_command_queue(cl_command_queue cmd_queue)
A lock that occupies a single byte.
Definition: spin_mutex.h:39
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
opencl_device_list operator()(const opencl_device_list &devices)
Base class for types that should not be assigned.
Definition: tbb_stddef.h:322
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
kernel(const cl_kernel &k, factory_type &f)
#define __TBB_DEPRECATED_IN_VERBOSE_MODE
Definition: tbb_config.h:647
bool get_event_from_one_arg(cl_event &e, const opencl_async_msg< T, F > &msg)
bool extension_available(const std::string &ext) const
opencl_device_list(std::initializer_list< opencl_device > il)
void suppress_unused_warning(const T1 &)
Utility template function to prevent "unused" warnings by various compilers.
Definition: tbb_stddef.h:398
T device_info(cl_device_id d, cl_device_info i)
opencl_program(Factory &factory, cl_program program)
bool is_same_context(opencl_device::device_id_type d1, opencl_device::device_id_type d2)
opencl_async_msg & operator=(const opencl_async_msg &dmsg)
static void fgt_multiinput_multioutput_node(void *, string_index, void *, void *)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
A stage in a pipeline.
Definition: pipeline.h:64
opencl_node< tuple< Ports... >, queueing, opencl_info::default_opencl_factory > base_type
default_opencl_factory & default_factory()
opencl_async_msg< void *, Factory > send(opencl_device device, const cl_event *e)
const opencl_device_list & available_devices()
T platform_info(cl_platform_id p, cl_platform_info i)
#define __TBB_STATIC_ASSERT(condition, msg)
Definition: tbb_stddef.h:553
opencl_program(Factory &factory, const char *program_name)
class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_subbuffer
tbb::atomic< opencl_device::device_id_type > my_curr_device_id
Release.
Definition: atomic.h:59
void info(cl_device_info i, T &t) const
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const opencl_async_msg< T, F > &msg)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)
void send_data(opencl_device device, T &t, Rest &... args)
friend bool operator==(const opencl_buffer< T, Factory > &lhs, const opencl_buffer< T, Factory > &rhs)
cl_event send_kernel_impl(opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
std::enable_if< is_memory_object_type< T >::value >::type receive_if_memory_object(const opencl_async_msg< T, Factory > &dmsg)
void send_kernel(opencl_device device, const kernel_type &kernel, const range_type &work_size, Args &... args)
void update_one_arg(cl_event e, opencl_async_msg< T, F > &msg)
opencl_async_msg(const opencl_async_msg &dmsg)
void const char const char int ITT_FORMAT __itt_group_sync p
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
opencl_program(opencl_program_type type, const std::string &program_name)
opencl_subbuffer< T, Factory > subbuffer(size_t index, size_t size) const
virtual void map_memory(opencl_device, opencl_async_msg< void *, Factory > &)=0
std::array< range_index_type, 3 > nd_range_type
class __TBB_DEPRECATED streaming_node
Definition: flow_graph.h:303
Represents acquisition of a mutex.
Definition: spin_mutex.h:53
opencl_buffer_impl(cl_mem m, size_t index, size_t size, Factory &f)
void update_arg_list(cl_event e, T &t, Rest &... args)
Base class for types that should not be copied or assigned.
Definition: tbb_stddef.h:330
static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data)
static void CL_CALLBACK register_callback_func(cl_event, cl_int event_command_exec_status, void *data)
opencl_buffer_impl< Factory > impl_type
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
void const char const char int ITT_FORMAT __itt_group_sync x void const char * name
#define __TBB_override
Definition: tbb_stddef.h:240
std::shared_ptr< tbb::atomic< bool > > my_callback_flag_ptr
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t length
opencl_node< tuple< Ports... >, JP, opencl_info::default_opencl_factory > base_type
void const char const char int ITT_FORMAT __itt_group_sync s
is_typedef(native_object_type)
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d, Factory &f)
No ordering.
Definition: atomic.h:61
bool built_in_kernel_available(const std::string &k) const
class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_node
void process_arg_list(const kernel_type &, std::array< cl_event, NUM_ARGS > &, int &, int &)
opencl_buffer(Factory &f, cl_mem m, size_t index, size_t size)
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
const nd_range_type & global_range() const
void enforce_cl_retcode(cl_int err, std::string msg)
Acquire.
Definition: atomic.h:57
std::enable_if< is_memory_object_type< T >::value >::type send_if_memory_object(opencl_device device, opencl_async_msg< T, Factory > &dmsg)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
opencl_range(G &&global_work=std::initializer_list< int >({ 0 }), L &&local_work=std::initializer_list< int >({ 0, 0, 0 }))
The graph class.
std::array< size_t, 3 > max_work_item_sizes() const
opencl_program(Factory &factory, opencl_program_type type, const std::string &program_name)
cl_kernel get_cl_kernel(const std::string &k) const
const nd_range_type & local_range() const
kernel_type get_kernel(const std::string &k) const
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
The graph related classes and functions.
K key_from_message(const T &t)
Definition: flow_graph.h:721
opencl_program_builder(Factory &f, const std::string &name, cl_program program, cl_uint num_devices, cl_device_id *device_list, const char *options, cl_callback_type callback, void *user_data)
void move(tbb_thread &t1, tbb_thread &t2)
Definition: tbb_thread.h:319

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.