Home ⌂Doc Index ◂Up ▴
Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbb::flow::interface11::opencl_factory< DeviceFilter > Class Template Reference

#include <flow_graph_opencl_node.h>

Inheritance diagram for tbb::flow::interface11::opencl_factory< DeviceFilter >:
Collaboration diagram for tbb::flow::interface11::opencl_factory< DeviceFilter >:

Classes

struct  finalize_fn
 
struct  finalize_fn_leaf
 
class  kernel
 

Public Types

template<typename T >
using async_msg_type = opencl_async_msg< T, opencl_factory< DeviceFilter > >
 
typedef opencl_device device_type
 
typedef kernel kernel_type
 
typedef opencl_range range_type
 

Public Member Functions

 opencl_factory ()
 
 ~opencl_factory ()
 
bool init (const opencl_device_list &device_list)
 
template<typename ... Args>
void send_kernel (opencl_device device, const kernel_type &kernel, const range_type &work_size, Args &... args)
 
template<typename T , typename ... Rest>
void send_data (opencl_device device, T &t, Rest &... args)
 
void send_data (opencl_device)
 
template<typename FinalizeFn , typename ... Args>
void finalize (opencl_device device, FinalizeFn fn, Args &... args)
 
const opencl_device_listdevices ()
 

Private Member Functions

template<typename Factory >
void enqueue_map_buffer (opencl_device device, opencl_buffer_impl< Factory > &buffer, opencl_async_msg< void *, Factory > &dmsg)
 
template<typename Factory >
void enqueue_unmap_buffer (opencl_device device, opencl_memory< Factory > &memory, opencl_async_msg< void *, Factory > &dmsg)
 
template<size_t NUM_ARGS, typename T >
void process_one_arg (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
 
template<size_t NUM_ARGS, typename T , typename F >
void process_one_arg (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const opencl_async_msg< T, F > &msg)
 
template<size_t NUM_ARGS, typename T , typename ... Rest>
void process_arg_list (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
 
template<size_t NUM_ARGS>
void process_arg_list (const kernel_type &, std::array< cl_event, NUM_ARGS > &, int &, int &)
 
template<typename T >
void update_one_arg (cl_event, T &)
 
template<typename T , typename F >
void update_one_arg (cl_event e, opencl_async_msg< T, F > &msg)
 
template<typename T , typename ... Rest>
void update_arg_list (cl_event e, T &t, Rest &... args)
 
void update_arg_list (cl_event)
 
cl_event send_kernel_impl (opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
 
template<typename T >
bool get_event_from_one_arg (cl_event &, const T &)
 
template<typename T , typename F >
bool get_event_from_one_arg (cl_event &e, const opencl_async_msg< T, F > &msg)
 
template<typename T , typename ... Rest>
bool get_event_from_args (cl_event &e, const T &t, const Rest &... args)
 
bool get_event_from_args (cl_event &)
 
bool is_same_context (opencl_device::device_id_type d1, opencl_device::device_id_type d2)
 
 opencl_factory (const opencl_factory &)
 
opencl_factoryoperator= (const opencl_factory &)
 
cl_context context ()
 
void init_once ()
 

Static Private Member Functions

static void CL_CALLBACK finalize_callback (cl_event, cl_int event_command_exec_status, void *data)
 

Private Attributes

std::once_flag my_once_flag
 
opencl_device_list my_devices
 
cl_context my_cl_context
 
tbb::spin_mutex my_devices_mutex
 

Friends

template<typename Factory >
class opencl_program
 
template<typename Factory >
class opencl_buffer_impl
 
template<typename Factory >
class opencl_memory
 

Detailed Description

template<typename DeviceFilter>
class tbb::flow::interface11::opencl_factory< DeviceFilter >

Definition at line 59 of file flow_graph_opencl_node.h.

Member Typedef Documentation

◆ async_msg_type

template<typename DeviceFilter>
template<typename T >
using tbb::flow::interface11::opencl_factory< DeviceFilter >::async_msg_type = opencl_async_msg<T, opencl_factory<DeviceFilter> >

Definition at line 826 of file flow_graph_opencl_node.h.

◆ device_type

template<typename DeviceFilter>
typedef opencl_device tbb::flow::interface11::opencl_factory< DeviceFilter >::device_type

Definition at line 827 of file flow_graph_opencl_node.h.

◆ kernel_type

template<typename DeviceFilter>
typedef kernel tbb::flow::interface11::opencl_factory< DeviceFilter >::kernel_type

Definition at line 869 of file flow_graph_opencl_node.h.

◆ range_type

template<typename DeviceFilter>
typedef opencl_range tbb::flow::interface11::opencl_factory< DeviceFilter >::range_type

Definition at line 873 of file flow_graph_opencl_node.h.

Constructor & Destructor Documentation

◆ opencl_factory() [1/2]

template<typename DeviceFilter>
tbb::flow::interface11::opencl_factory< DeviceFilter >::opencl_factory ( )
inline

Definition at line 875 of file flow_graph_opencl_node.h.

875 {}

◆ ~opencl_factory()

template<typename DeviceFilter>
tbb::flow::interface11::opencl_factory< DeviceFilter >::~opencl_factory ( )
inline

Definition at line 876 of file flow_graph_opencl_node.h.

876  {
877  if ( my_devices.size() ) {
878  for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
879  enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
880  }
881  enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
882  }
883  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
void enforce_cl_retcode(cl_int err, std::string msg)

◆ opencl_factory() [2/2]

template<typename DeviceFilter>
tbb::flow::interface11::opencl_factory< DeviceFilter >::opencl_factory ( const opencl_factory< DeviceFilter > &  )
private

Member Function Documentation

◆ context()

template<typename DeviceFilter>
cl_context tbb::flow::interface11::opencl_factory< DeviceFilter >::context ( )
inlineprivate

◆ devices()

template<typename DeviceFilter>
const opencl_device_list& tbb::flow::interface11::opencl_factory< DeviceFilter >::devices ( )
inline

Definition at line 1075 of file flow_graph_opencl_node.h.

◆ enqueue_map_buffer()

template<typename DeviceFilter>
template<typename Factory >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::enqueue_map_buffer ( opencl_device  device,
opencl_buffer_impl< Factory > &  buffer,
opencl_async_msg< void *, Factory > &  dmsg 
)
inlineprivate

Definition at line 897 of file flow_graph_opencl_node.h.

897  {
898  cl_event const* e1 = dmsg.get_event();
899  cl_event e2;
900  cl_int err;
901  void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
902  e1 == NULL ? 0 : 1, e1, &e2, &err );
903  enforce_cl_retcode( err, "Failed to map a buffer" );
904  dmsg.data( false ) = ptr;
905  dmsg.set_event( e2 );
906  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
907  }
void enforce_cl_retcode(cl_int err, std::string msg)

◆ enqueue_unmap_buffer()

template<typename DeviceFilter>
template<typename Factory >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::enqueue_unmap_buffer ( opencl_device  device,
opencl_memory< Factory > &  memory,
opencl_async_msg< void *, Factory > &  dmsg 
)
inlineprivate

Definition at line 911 of file flow_graph_opencl_node.h.

911  {
912  cl_event const* e1 = dmsg.get_event();
913  cl_event e2;
915  clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
916  "Failed to unmap a buffer" );
917  dmsg.set_event( e2 );
918  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
919  }
void enforce_cl_retcode(cl_int err, std::string msg)

◆ finalize()

template<typename DeviceFilter>
template<typename FinalizeFn , typename ... Args>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::finalize ( opencl_device  device,
FinalizeFn  fn,
Args &...  args 
)
inline

Definition at line 1064 of file flow_graph_opencl_node.h.

1064  {
1065  cl_event e;
1066 
1067  if ( get_event_from_args( e, args... ) ) {
1068  enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1069  new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1070  }
1071 
1072  enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1073  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s const char ITT_FORMAT s __itt_frame ITT_FORMAT p __itt_counter ITT_FORMAT p __itt_counter unsigned long long ITT_FORMAT lu __itt_counter unsigned long long ITT_FORMAT lu __itt_counter __itt_clock_domain unsigned long long void ITT_FORMAT p const wchar_t ITT_FORMAT S __itt_mark_type const wchar_t ITT_FORMAT S __itt_mark_type const char ITT_FORMAT s __itt_mark_type ITT_FORMAT d __itt_caller ITT_FORMAT p __itt_caller ITT_FORMAT p no args const __itt_domain __itt_clock_domain unsigned long long __itt_id ITT_FORMAT lu const __itt_domain __itt_clock_domain unsigned long long __itt_id __itt_id void * fn
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)
static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data)
void enforce_cl_retcode(cl_int err, std::string msg)

◆ finalize_callback()

template<typename DeviceFilter>
static void CL_CALLBACK tbb::flow::interface11::opencl_factory< DeviceFilter >::finalize_callback ( cl_event  ,
cl_int  event_command_exec_status,
void data 
)
inlinestaticprivate

Definition at line 1051 of file flow_graph_opencl_node.h.

1051  {
1052  tbb::internal::suppress_unused_warning(event_command_exec_status);
1053  __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1054 
1055  finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1056  __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1057  (*fn_ptr)();
1058 
1059  // Function pointer was created by 'new' & this callback must be called once only
1060  delete fn_ptr;
1061  }
void suppress_unused_warning(const T1 &)
Utility template function to prevent "unused" warnings by various compilers.
Definition: tbb_stddef.h:398
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165

◆ get_event_from_args() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::get_event_from_args ( cl_event &  e,
const T &  t,
const Rest &...  args 
)
inlineprivate

Definition at line 1026 of file flow_graph_opencl_node.h.

1026  {
1027  if ( get_event_from_one_arg( e, t ) ) {
1028  return true;
1029  }
1030 
1031  return get_event_from_args( e, args... );
1032  }
bool get_event_from_one_arg(cl_event &, const T &)
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)

◆ get_event_from_args() [2/2]

template<typename DeviceFilter>
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::get_event_from_args ( cl_event &  )
inlineprivate

Definition at line 1034 of file flow_graph_opencl_node.h.

1034  {
1035  return false;
1036  }

◆ get_event_from_one_arg() [1/2]

template<typename DeviceFilter>
template<typename T >
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::get_event_from_one_arg ( cl_event &  ,
const T &   
)
inlineprivate

Definition at line 1009 of file flow_graph_opencl_node.h.

1009  {
1010  return false;
1011  }

◆ get_event_from_one_arg() [2/2]

template<typename DeviceFilter>
template<typename T , typename F >
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::get_event_from_one_arg ( cl_event &  e,
const opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 1014 of file flow_graph_opencl_node.h.

1014  {
1015  cl_event const *e_ptr = msg.get_event();
1016 
1017  if ( e_ptr != NULL ) {
1018  e = *e_ptr;
1019  return true;
1020  }
1021 
1022  return false;
1023  }

◆ init()

template<typename DeviceFilter>
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::init ( const opencl_device_list device_list)
inline

Definition at line 885 of file flow_graph_opencl_node.h.

885  {
887  if ( !my_devices.size() ) {
888  my_devices = device_list;
889  return true;
890  }
891  return false;
892  }
Represents acquisition of a mutex.
Definition: spin_mutex.h:53
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock

◆ init_once()

template<typename DeviceFilter>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::init_once ( )
inlineprivate

Definition at line 1097 of file flow_graph_opencl_node.h.

1097  {
1098  {
1100  if (!my_devices.size())
1101  my_devices = DeviceFilter()( opencl_info::available_devices() );
1102  }
1103 
1104  enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1105  cl_platform_id platform_id = my_devices.begin()->platform_id();
1106  for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1107  enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1108 
1109  std::vector<cl_device_id> cl_device_ids;
1110  for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1111  cl_device_ids.push_back((*d).my_cl_device_id);
1112  }
1113 
1114  cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1115  cl_int err;
1116  cl_context ctx = clCreateContext(context_properties,
1117  (cl_uint)cl_device_ids.size(),
1118  cl_device_ids.data(),
1119  NULL, NULL, &err);
1120  enforce_cl_retcode(err, "Failed to create context");
1121  my_cl_context = ctx;
1122 
1123  size_t device_counter = 0;
1124  for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1125  (*d).my_device_id = device_counter++;
1126  cl_int err2;
1127  cl_command_queue cq;
1128 #if CL_VERSION_2_0
1129  if ((*d).major_version() >= 2) {
1130  if ((*d).out_of_order_exec_mode_on_host_present()) {
1131  cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1132  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1133  } else {
1134  cl_queue_properties props[] = { 0 };
1135  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1136  }
1137  } else
1138 #endif
1139  {
1140  cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1141  // Suppress "declared deprecated" warning for the next line.
1142 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1143 #pragma GCC diagnostic push
1144 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1145 #endif
1146 #if _MSC_VER || __INTEL_COMPILER
1147 #pragma warning( push )
1148 #if __INTEL_COMPILER
1149 #pragma warning (disable: 1478)
1150 #else
1151 #pragma warning (disable: 4996)
1152 #endif
1153 #endif
1154  cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1155 #if _MSC_VER || __INTEL_COMPILER
1156 #pragma warning( pop )
1157 #endif
1158 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1159 #pragma GCC diagnostic pop
1160 #endif
1161  }
1162  enforce_cl_retcode(err2, "Failed to create command queue");
1163  (*d).my_cl_command_queue = cq;
1164  }
1165  }
const opencl_device_list & available_devices()
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
Represents acquisition of a mutex.
Definition: spin_mutex.h:53
void enforce_cl_retcode(cl_int err, std::string msg)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock

Referenced by tbb::flow::interface11::opencl_factory< default_device_filter >::context(), and tbb::flow::interface11::opencl_factory< default_device_filter >::devices().

Here is the caller graph for this function:

◆ is_same_context()

template<typename DeviceFilter>
bool tbb::flow::interface11::opencl_factory< DeviceFilter >::is_same_context ( opencl_device::device_id_type  d1,
opencl_device::device_id_type  d2 
)
inlineprivate

Definition at line 1081 of file flow_graph_opencl_node.h.

1081  {
1083  // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
1084  if ( d1 != opencl_device::host && d2 != opencl_device::host )
1085  return true;
1086  return d1 == d2;
1087  }
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165

◆ operator=()

template<typename DeviceFilter>
opencl_factory& tbb::flow::interface11::opencl_factory< DeviceFilter >::operator= ( const opencl_factory< DeviceFilter > &  )
private

◆ process_arg_list() [1/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T , typename ... Rest>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::process_arg_list ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  events,
int num_events,
int place,
const T &  t,
const Rest &...  args 
)
inlineprivate

Definition at line 941 of file flow_graph_opencl_node.h.

941  {
942  process_one_arg( kernel, events, num_events, place, t );
943  process_arg_list( kernel, events, num_events, place, args... );
944  }
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)

◆ process_arg_list() [2/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::process_arg_list ( const kernel_type ,
std::array< cl_event, NUM_ARGS > &  ,
int ,
int  
)
inlineprivate

Definition at line 947 of file flow_graph_opencl_node.h.

947 {}

◆ process_one_arg() [1/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::process_one_arg ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  ,
int ,
int place,
const T &  t 
)
inlineprivate

Definition at line 923 of file flow_graph_opencl_node.h.

923  {
924  auto p = get_native_object(t);
925  enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
926  }
std::enable_if< is_native_object_type< T >::value, typename T::native_object_type >::type get_native_object(const T &t)
void const char const char int ITT_FORMAT __itt_group_sync p
void enforce_cl_retcode(cl_int err, std::string msg)

◆ process_one_arg() [2/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T , typename F >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::process_one_arg ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  events,
int num_events,
int place,
const opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 929 of file flow_graph_opencl_node.h.

929  {
930  __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
931 
932  const cl_event * const e = msg.get_event();
933  if (e != NULL) {
934  events[num_events++] = *e;
935  }
936 
937  process_one_arg( kernel, events, num_events, place, msg.data(false) );
938  }
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165

◆ send_data() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::send_data ( opencl_device  device,
T &  t,
Rest &...  args 
)
inline

Definition at line 983 of file flow_graph_opencl_node.h.

983  {
984  send_if_memory_object( device, t );
985  send_data( device, args... );
986  }
void send_data(opencl_device device, T &t, Rest &... args)
std::enable_if< is_memory_object_type< T >::value >::type send_if_memory_object(opencl_device device, opencl_async_msg< T, Factory > &dmsg)

◆ send_data() [2/2]

template<typename DeviceFilter>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::send_data ( opencl_device  )
inline

Definition at line 988 of file flow_graph_opencl_node.h.

988 {}

◆ send_kernel()

template<typename DeviceFilter>
template<typename ... Args>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::send_kernel ( opencl_device  device,
const kernel_type kernel,
const range_type work_size,
Args &...  args 
)
inline

Definition at line 967 of file flow_graph_opencl_node.h.

967  {
968  std::array<cl_event, sizeof...(Args)> events;
969  int num_events = 0;
970  int place = 0;
971  process_arg_list( kernel, events, num_events, place, args... );
972 
973  const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
974 
975  update_arg_list(e, args...);
976 
977  // Release our own reference to cl_event
978  enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
979  }
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
cl_event send_kernel_impl(opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
void update_arg_list(cl_event e, T &t, Rest &... args)
void enforce_cl_retcode(cl_int err, std::string msg)

◆ send_kernel_impl()

template<typename DeviceFilter>
cl_event tbb::flow::interface11::opencl_factory< DeviceFilter >::send_kernel_impl ( opencl_device  device,
const cl_kernel &  kernel,
const range_type work_size,
cl_uint  num_events,
cl_event *  event_list 
)
inlineprivate

Definition at line 992 of file flow_graph_opencl_node.h.

993  {
994  const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
995  const typename range_type::nd_range_type& g_size = work_size.global_range();
996  const typename range_type::nd_range_type& l_size = work_size.local_range();
997  cl_uint s;
998  for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
999  cl_event event;
1001  clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
1002  g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
1003  "Failed to enqueue a kernel" );
1004  return event;
1005  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
std::array< range_index_type, 3 > nd_range_type
void const char const char int ITT_FORMAT __itt_group_sync s
void enforce_cl_retcode(cl_int err, std::string msg)

◆ update_arg_list() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::update_arg_list ( cl_event  e,
T &  t,
Rest &...  args 
)
inlineprivate

Definition at line 958 of file flow_graph_opencl_node.h.

958  {
959  update_one_arg( e, t );
960  update_arg_list( e, args... );
961  }
void update_arg_list(cl_event e, T &t, Rest &... args)

◆ update_arg_list() [2/2]

template<typename DeviceFilter>
void tbb::flow::interface11::opencl_factory< DeviceFilter >::update_arg_list ( cl_event  )
inlineprivate

Definition at line 963 of file flow_graph_opencl_node.h.

963 {}

◆ update_one_arg() [1/2]

template<typename DeviceFilter>
template<typename T >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::update_one_arg ( cl_event  ,
T &   
)
inlineprivate

Definition at line 950 of file flow_graph_opencl_node.h.

950 {}

◆ update_one_arg() [2/2]

template<typename DeviceFilter>
template<typename T , typename F >
void tbb::flow::interface11::opencl_factory< DeviceFilter >::update_one_arg ( cl_event  e,
opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 953 of file flow_graph_opencl_node.h.

953  {
954  msg.set_event( e );
955  }

Friends And Related Function Documentation

◆ opencl_buffer_impl

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_buffer_impl
friend

Definition at line 1176 of file flow_graph_opencl_node.h.

◆ opencl_memory

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_memory
friend

Definition at line 1178 of file flow_graph_opencl_node.h.

◆ opencl_program

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_program
friend

Definition at line 1174 of file flow_graph_opencl_node.h.

Member Data Documentation

◆ my_cl_context

template<typename DeviceFilter>
cl_context tbb::flow::interface11::opencl_factory< DeviceFilter >::my_cl_context
private

Definition at line 1169 of file flow_graph_opencl_node.h.

◆ my_devices

template<typename DeviceFilter>
opencl_device_list tbb::flow::interface11::opencl_factory< DeviceFilter >::my_devices
private

Definition at line 1168 of file flow_graph_opencl_node.h.

◆ my_devices_mutex

template<typename DeviceFilter>
tbb::spin_mutex tbb::flow::interface11::opencl_factory< DeviceFilter >::my_devices_mutex
private

Definition at line 1171 of file flow_graph_opencl_node.h.

◆ my_once_flag

template<typename DeviceFilter>
std::once_flag tbb::flow::interface11::opencl_factory< DeviceFilter >::my_once_flag
private

Definition at line 1167 of file flow_graph_opencl_node.h.


The documentation for this class was generated from the following file:

Copyright © 2005-2020 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.