29 #ifdef ACL_INTERNAL_TEST_CKW_IN_DF
31 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
37 namespace experimental
39 namespace dynamic_fusion
55 IClKernel::configure_internal(code.
window());
64 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
66 inline void ClKernelRuntime::add_tensor_argument(
unsigned int &idx,
70 std::vector<cl::Image2D> &cl_images)
84 add_1D_tensor_argument(idx,
tensor, arg_slice);
90 add_2D_tensor_argument(idx,
tensor, arg_slice);
95 add_2D_tensor_argument(idx,
tensor, arg_slice);
96 const unsigned int total_cross_plane_pad =
tensor->info()->padding().top +
tensor->info()->padding().bottom;
97 _kernel.setArg<cl_uint>(idx++,
static_cast<unsigned int>(total_cross_plane_pad));
102 const TensorShape shape2d(
tensor->info()->dimension(0) / 4,
tensor->info()->dimension(1) *
103 tensor->info()->dimension(2) *
104 tensor->info()->dimension(3));
105 const size_t image_row_pitch =
tensor->info()->strides_in_bytes()[1];
106 cl::Image2D tensor_image2d =
109 cl_images.push_back(tensor_image2d);
110 _kernel.setArg(idx++, tensor_image2d);
116 add_2D_tensor_argument(idx,
tensor, arg_slice);
117 _kernel.setArg<cl_uint>(idx++,
static_cast<unsigned int>(
tensor->info()->strides_in_bytes()[2]));
122 const TensorShape shape2d(
tensor->info()->dimension(0) / 4,
tensor->info()->dimension(1) *
123 tensor->info()->dimension(2) *
124 tensor->info()->dimension(3));
125 const size_t image_row_pitch =
tensor->info()->strides_in_bytes()[1];
126 cl::Image2D tensor_image2d =
129 cl_images.push_back(tensor_image2d);
130 _kernel.setArg(idx++, tensor_image2d);
131 _kernel.setArg<cl_uint>(idx++,
static_cast<unsigned int>(
tensor->info()->strides_in_bytes()[2]));
137 add_3D_tensor_argument(idx,
tensor, arg_slice);
143 add_4D_tensor_argument(idx,
tensor, arg_slice);
148 add_4d_tensor_nhwc_argument(idx,
tensor);
153 const size_t image_w =
tensor->info()->dimension(0) / 4;
154 const size_t image_h =
tensor->info()->tensor_shape().total_size_upper(1);
155 const size_t image_stride_y =
tensor->info()->strides_in_bytes()[1];
160 cl_images.push_back(tensor_image2d);
162 _kernel.setArg(idx++, tensor_image2d);
163 add_4d_tensor_nhwc_argument(idx,
tensor);
169 const Strides &strides =
info->strides_in_bytes();
171 _kernel.setArg(idx++,
tensor->cl_buffer());
172 const size_t dim1xdim2 =
info->tensor_shape()[1] *
info->tensor_shape()[2];
173 _kernel.setArg<cl_int>(idx++,
static_cast<int32_t
>(dim1xdim2));
174 const size_t stride1 = strides[1];
175 _kernel.setArg<cl_int>(idx++,
static_cast<int32_t
>(stride1));
185 #else // ACL_INTERNAL_TEST_CKW_IN_DF
186 inline void ClKernelRuntime::add_kernel_argument(
unsigned int &idx,
187 const GpuKernelArgumentBinding &arg,
189 std::vector<cl::Image2D> &cl_images)
193 case GpuKernelArgumentBinding::Type::TensorStorage:
195 switch (arg.tensor_storage_type())
197 case TensorStorageType::ClBufferUint8Ptr:
202 case TensorStorageType::ClImage2dReadOnly:
205 cl_images.push_back(tensor_image2d);
209 case TensorStorageType::ClImage2dWriteOnly:
212 cl_images.push_back(tensor_image2d);
224 case GpuKernelArgumentBinding::Type::TensorComponent:
237 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
247 constexpr
bool skip_sliding_window =
false;
248 constexpr
bool use_dummy_work_items =
false;
250 unsigned int idx = 0;
255 std::vector<cl::Image2D> cl_images;
256 #ifndef ACL_INTERNAL_TEST_CKW_IN_DF
257 for (
auto id_arg : _arguments)
259 const auto arg = id_arg.second;
260 auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.
get_tensor(id_arg.first));
263 add_tensor_argument(idx, *arg.kernel_argument_info(),
tensor,
slice, cl_images);
266 #else // ACL_INTERNAL_TEST_CKW_IN_DF
267 for (
const auto &arg : _arguments)
269 auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.
get_tensor(arg.id()));
272 add_kernel_argument(idx, arg,
tensor, cl_images);
274 #endif // ACL_INTERNAL_TEST_CKW_IN_DF
277 enqueue(queue, *
this,
slice, lws_hint(), use_dummy_work_items);