Compute Library
 21.08
ClMulKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
31 #include "src/core/CL/CLValidate.h"
34 #include "support/Cast.h"
35 #include "support/StringSupport.h"
36 
37 namespace arm_compute
38 {
39 namespace opencl
40 {
41 namespace kernels
42 {
43 namespace
44 {
45 Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale,
46  ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
47 {
48  ARM_COMPUTE_UNUSED(overflow_policy);
49  ARM_COMPUTE_UNUSED(rounding_policy);
50 
51  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, dst);
54  1,
59  1,
63  ARM_COMPUTE_RETURN_ERROR_ON_MSG(scale < 0, "Scale cannot be negative.");
64  ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled() && !is_data_type_float(dst->data_type()));
65 
66  // Check whether it is in_place calculation
67  const bool in_place = (src1 == dst) || (src2 == dst);
68  const bool src1_in_place = in_place && (src1 == dst);
69 
70  const TensorShape &out_shape = TensorShape::broadcast_shape(src1->tensor_shape(), src2->tensor_shape());
71 
72  ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
73 
74  // Validate in case of configured dst
75  if(dst->total_size() > 0)
76  {
78  1,
82  ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst->data_type() == DataType::U8 && (src1->data_type() != DataType::U8 || src2->data_type() != DataType::U8),
83  "Dst can only be U8 if both src are U8");
84  ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst->data_type() == DataType::QASYMM8 && (src1->data_type() != DataType::QASYMM8 || src2->data_type() != DataType::QASYMM8),
85  "Dst can only be QASYMM8 if both src are QASYMM8");
86  ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst->data_type() == DataType::QASYMM8_SIGNED && (src1->data_type() != DataType::QASYMM8_SIGNED || src2->data_type() != DataType::QASYMM8_SIGNED),
87  "Dst can only be QASYMM8_SIGNED if both src are QASYMM8_SIGNED");
88  ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst->data_type() == DataType::QSYMM16 && (src1->data_type() != DataType::QSYMM16 || src2->data_type() != DataType::QSYMM16),
89  "Dst can only be QSYMM16 if both src are QSYMM16");
90  ARM_COMPUTE_RETURN_ERROR_ON_MSG((src1->data_type() == DataType::S32 || src2->data_type() == DataType::S32) && (dst->data_type() != DataType::S32),
91  "Dst must be S32 if source tensors are S32");
92  if(in_place)
93  {
94  ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, src1_in_place ? src1->tensor_shape() : src2->tensor_shape(), 0),
95  "Wrong shape for dst, cannot do in_place calculation");
96  }
97  else
98  {
99  ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0),
100  "Wrong shape for dst");
101  }
102  }
103 
104  return Status{};
105 }
106 } // namespace
107 
109 {
111 }
112 
113 void ClMulKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale,
114  ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
115 {
116  ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst);
117  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src1, src2, dst,
118  scale, overflow_policy, rounding_policy, act_info));
119 
120  auto padding_info = get_padding_info({ src1, src2, dst });
121 
122  const TensorShape &out_shape = TensorShape::broadcast_shape(src1->tensor_shape(), src2->tensor_shape());
123  auto_init_if_empty(*dst, src1->clone()->set_tensor_shape(out_shape));
124 
125  int scale_int = -1;
126  // Extract sign, exponent and mantissa
127  int exponent = 0;
128  float normalized_mantissa = std::frexp(scale, &exponent);
129  // Use int scaling if factor is equal to 1/2^n for 0 <= n <= 15
130  // frexp returns 0.5 as mantissa which means that the exponent will be in the range of -1 <= e <= 14
131  // Moreover, it will be negative as we deal with 1/2^n
132  if((normalized_mantissa == 0.5f) && (-14 <= exponent) && (exponent <= 1))
133  {
134  // Store the positive exponent. We know that we compute 1/2^n
135  // Additionally we need to subtract 1 to compensate that frexp used a mantissa of 0.5
136  scale_int = std::abs(exponent - 1);
137  }
138 
139  std::string acc_type;
140  // Check if it has float src and dst
142  {
143  scale_int = -1;
144  acc_type = (src1->data_type() == DataType::F32 || src2->data_type() == DataType::F32) ? "float" : "half";
145  }
146  else
147  {
148  if(src1->element_size() == 4 || src2->element_size() == 4)
149  {
150  // use 64 bit accumulator for 32-bit input
151  acc_type = "long";
152  }
153  else if(src1->element_size() == 2 || src2->element_size() == 2)
154  {
155  // Use 32-bit accumulator for 16-bit input
156  acc_type = "int";
157  }
158  else
159  {
160  // Use 16-bit accumulator for 8-bit input
161  acc_type = "ushort";
162  }
163  }
164 
165  const bool is_quantized = is_data_type_quantized(src1->data_type());
166  const unsigned int vec_size = adjust_vec_size(16 / dst->element_size(), dst->dimension(0));
167  const unsigned int vec_size_leftover = dst->dimension(0) % vec_size;
168 
169  // Set kernel build options
170  std::string kernel_name = "pixelwise_mul";
171  CLBuildOptions build_opts;
172  build_opts.add_option("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(src1->data_type()));
173  build_opts.add_option("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(src2->data_type()));
174  build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(dst->data_type()));
175  build_opts.add_option("-DVEC_SIZE_IN1=" + ((dst->dimension(0) != 1 && src1->dimension(0) == 1) ? "1" : support::cpp11::to_string(vec_size)));
176  build_opts.add_option("-DVEC_SIZE_IN2=" + ((dst->dimension(0) != 1 && src2->dimension(0) == 1) ? "1" : support::cpp11::to_string(vec_size)));
177  build_opts.add_option("-DVEC_SIZE_OUT=" + support::cpp11::to_string(vec_size));
178  build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover));
179  if(is_quantized && (dst->data_type() != DataType::S32))
180  {
181  const UniformQuantizationInfo iq1_info = src1->quantization_info().uniform();
182  const UniformQuantizationInfo iq2_info = src2->quantization_info().uniform();
183  const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
184 
186  "-DOFFSET_IN1=" + support::cpp11::to_string(iq1_info.offset));
188  "-DOFFSET_IN2=" + support::cpp11::to_string(iq2_info.offset));
190  "-DOFFSET_OUT=" + support::cpp11::to_string(oq_info.offset));
191  build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
192  build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
193  build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
194  kernel_name += "_quantized";
195  }
196  else
197  {
198  kernel_name += (scale_int >= 0) ? "_int" : "_float";
199  build_opts.add_option_if_else(overflow_policy == ConvertPolicy::WRAP || is_data_type_float(dst->data_type()), "-DWRAP", "-DSATURATE");
200  build_opts.add_option_if_else(rounding_policy == RoundingPolicy::TO_ZERO, "-DROUND=_rtz", "-DROUND=_rte");
201  build_opts.add_option("-DACC_DATA_TYPE=" + acc_type);
202  if(act_info.enabled())
203  {
204  build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
205  build_opts.add_option("-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
206  build_opts.add_option("-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
207  }
208  }
209 
210  // Check whether it is in_place calculation
211  const bool in_place = (src1 == dst) || (src2 == dst);
212  const bool src1_in_place = in_place && (src1 == dst);
213  build_opts.add_option_if(in_place, "-DIN_PLACE");
214  build_opts.add_option_if(src1_in_place, "-DSRC1_IN_PLACE");
215 
216  // Create kernel
217  _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
218 
219  // Set scale argument
220  unsigned int idx = (in_place ? 2 : 3) * num_arguments_per_3D_tensor(); // Skip the src and dst parameters
221 
222  if(scale_int >= 0 && !is_quantized)
223  {
224  _kernel.setArg(idx++, scale_int);
225  }
226  else
227  {
228  _kernel.setArg(idx++, scale);
229  }
230 
231  Window win = calculate_max_window(*dst, Steps(vec_size));
232  ICLKernel::configure_internal(win);
233 
235 
236  // Set config_id for enabling LWS tuning
237  _config_id = kernel_name;
238  _config_id += "_";
239  _config_id += lower_string(string_from_data_type(dst->data_type()));
240  _config_id += "_";
241  _config_id += support::cpp11::to_string(src1->dimension(0));
242  _config_id += "_";
243  _config_id += support::cpp11::to_string(src1->dimension(1));
244  _config_id += "_";
245  _config_id += support::cpp11::to_string(src1->dimension(2));
246  _config_id += "_";
247  _config_id += support::cpp11::to_string(src2->dimension(0));
248  _config_id += "_";
249  _config_id += support::cpp11::to_string(src2->dimension(1));
250  _config_id += "_";
251  _config_id += support::cpp11::to_string(src2->dimension(2));
252  _config_id += "_";
253  _config_id += support::cpp11::to_string(dst->dimension(0));
254  _config_id += "_";
255  _config_id += support::cpp11::to_string(dst->dimension(1));
256  _config_id += "_";
257  _config_id += support::cpp11::to_string(dst->dimension(2));
258 }
259 
260 Status ClMulKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale,
261  ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
262 {
263  ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst);
264  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src1, src2, dst, scale, overflow_policy, rounding_policy, act_info));
265 
266  return Status{};
267 }
268 
269 void ClMulKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
270 {
273 
274  const auto src_0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
275  const auto src_1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
276  auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
277 
278  ARM_COMPUTE_ERROR_ON_NULLPTR(src_0, src_1, dst);
279 
280  const TensorShape &in_shape1 = src_0->info()->tensor_shape();
281  const TensorShape &in_shape2 = src_1->info()->tensor_shape();
282  const TensorShape &out_shape = dst->info()->tensor_shape();
283 
284  bool can_collapse = true;
285  if(std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
286  {
287  can_collapse = (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
288  for(size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); ++d)
289  {
290  can_collapse = (in_shape1[d] == in_shape2[d]);
291  }
292  }
293 
294  bool has_collapsed = false;
295  Window collapsed = can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) : window;
296 
297  const TensorShape &in_shape1_collapsed = has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
298  const TensorShape &in_shape2_collapsed = has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
299 
300  Window slice = collapsed.first_slice_window_3D();
301  Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
302  Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
303 
304  // Check whether it is in_place calculation
305  const bool in_place = (src_0 == dst) || (src_1 == dst);
306  do
307  {
308  unsigned int idx = 0;
309  add_3D_tensor_argument(idx, src_0, slice_input1);
310  add_3D_tensor_argument(idx, src_1, slice_input2);
311  if(!in_place)
312  {
313  add_3D_tensor_argument(idx, dst, slice);
314  }
315  enqueue(queue, *this, slice, lws_hint());
316 
317  ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input1));
318  ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input2));
319  }
320  while(collapsed.slide_window_slice_3D(slice));
321 }
322 
323 namespace
324 {
325 constexpr unsigned int vec_size_complex = 1;
326 
327 Status validate_arguments_complex(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
328 {
332 
333  const TensorShape &out_shape = TensorShape::broadcast_shape(src1->tensor_shape(), src2->tensor_shape());
334 
335  ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
337 
338  // Validate in case of configured dst
339  if(dst->total_size() > 0)
340  {
343  ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0), "Wrong shape for dst");
344  }
345 
346  return Status{};
347 }
348 } // namespace
349 
351 {
353 }
354 
355 void ClComplexMulKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
356 {
357  ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst);
358  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_complex(src1, src2, dst, act_info));
359 
360  auto padding_info = get_padding_info({ src1, src2, dst });
361 
362  const TensorShape &out_shape = TensorShape::broadcast_shape(src1->tensor_shape(), src2->tensor_shape());
363  auto_init_if_empty(*dst, src1->clone()->set_tensor_shape(out_shape));
364 
365  CLBuildOptions build_opts;
366  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(dst->data_type()));
367  if(act_info.enabled())
368  {
369  build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
370  build_opts.add_option("-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
371  build_opts.add_option("-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
372  }
373 
374  // Create kernel
375  _kernel = create_kernel(compile_context, "pixelwise_mul_complex", build_opts.options());
376 
377  Window win = calculate_max_window(*dst, Steps(vec_size_complex));
378  ICLKernel::configure_internal(win);
379 
381 }
382 
383 Status ClComplexMulKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
384 {
385  ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst);
386  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_complex(src1, src2, dst, act_info));
387 
388  return Status{};
389 }
390 
391 void ClComplexMulKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
392 {
395 
396  const auto src_0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
397  const auto src_1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
398  auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
399 
400  const TensorShape &in_shape1 = src_0->info()->tensor_shape();
401  const TensorShape &in_shape2 = src_1->info()->tensor_shape();
402  const TensorShape &out_shape = dst->info()->tensor_shape();
403 
404  bool can_collapse = true;
405  if(std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
406  {
407  can_collapse = (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
408  for(size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); ++d)
409  {
410  can_collapse = (in_shape1[d] == in_shape2[d]);
411  }
412  }
413 
414  bool has_collapsed = false;
415  Window collapsed = can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) : window;
416 
417  const TensorShape &in_shape1_collapsed = has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
418  const TensorShape &in_shape2_collapsed = has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
419 
420  Window slice = collapsed.first_slice_window_3D();
421  Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
422  Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
423 
424  do
425  {
426  unsigned int idx = 0;
427  add_3D_tensor_argument(idx, src_0, slice_input1);
428  add_3D_tensor_argument(idx, src_1, slice_input2);
429  add_3D_tensor_argument(idx, dst, slice);
430  enqueue(queue, *this, slice, lws_hint());
431 
432  ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input1));
433  ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input2));
434  }
435  while(collapsed.slide_window_slice_3D(slice));
436 }
437 } // namespace kernels
438 } // namespace opencl
439 } // namespace arm_compute
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: Utils.h:981
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
Definition: CLValidate.h:35
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
Shape of a tensor.
Definition: TensorShape.h:39
void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel&#39;s src and dst.
quantized, symmetric fixed-point 16-bit number
static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration.
bool enabled() const
Check if initialised.
Definition: Types.h:1525
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
Definition: ICLKernel.cpp:32
const StringSet & options() const
Gets the current options list set.
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:318
TensorShape collapsed_from(size_t start) const
Return a copy with collapsed dimensions starting from a given point.
Definition: TensorShape.h:161
1 channel, 1 U8 per channel
float a() const
Get the alpha value.
Definition: Types.h:1515
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
static TensorShape broadcast_shape(const Shapes &... shapes)
If shapes are broadcast compatible, return the broadcasted shape.
Definition: TensorShape.h:211
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
const std::string & string_from_activation_func(ActivationLayerInfo::ActivationFunction act)
Translates a given activation function to a string.
Definition: Utils.cpp:163
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Quantization info when assuming per layer quantization.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
Status class.
Definition: Error.h:52
std::string lower_string(const std::string &val)
Lower a given string.
Definition: Utils.cpp:326
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Activation Layer Information class.
Definition: Types.h:1475
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:214
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
1 channel, 1 S32 per channel
void add_option(std::string option)
Adds option to the existing build option list.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:54
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:391
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Definition: Utils.cpp:135
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
Definition: ICLKernel.h:256
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
Definition: Window.inl:68
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
Definition: Utils.h:1075
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
size_t total_size() const
Collapses all dimensions to a single linear total size.
Definition: TensorShape.h:172
UniformQuantizationInfo uniform() const
Return per layer quantization info.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:39
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
RoundingPolicy
Rounding method.
Definition: Rounding.h:30
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
bool have_different_dimensions(const Dimensions< T > &dim1, const Dimensions< T > &dim2, unsigned int upper_dim)
Definition: Validate.h:47
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
Window broadcast_if_dimension_le_one(const TensorShape &shape) const
Don&#39;t advance in the dimension where shape is less equal to 1.
Definition: Window.inl:120
Elementeise CL kernel type.
Definition: CLTypes.h:84
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
Definition: Window.h:335
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:915
1 channel, 1 S16 per channel
bool has_padding_changed(const std::unordered_map< const ITensorInfo *, PaddingSize > &padding_map)
Check if the previously stored padding info has changed after configuring a kernel.
Definition: Utils.cpp:533
CLCompileContext class.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1003
static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:64
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
unsigned int num_dimensions() const
Returns the effective dimensionality of the tensor.
Definition: Dimensions.h:143
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
std::unordered_map< const ITensorInfo *, PaddingSize > get_padding_info(std::initializer_list< const ITensorInfo *> infos)
Stores padding information before configuring a kernel.
Definition: Utils.cpp:518
void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel&#39;s src and dst.
Wrapper to configure the Khronos OpenCL C++ header.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
Tensor packing service.
Definition: ITensorPack.h:39
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input&#39;s first dimension, getting rounded down to its closest valid vector size.
Definition: Utils.h:1171
ActivationFunction activation() const
Get the type of activation function.
Definition: Types.h:1510
float b() const
Get the beta value.
Definition: Types.h:1520
quantized, asymmetric fixed-point 8-bit number signed
Window first_slice_window_3D() const
First 3D slice of the window.
Definition: Window.h:291
std::string kernel_name
Truncates the least significant values that are lost in operations.
Describe a multidimensional execution window.
Definition: Window.h:39
ConvertPolicy
Policy to handle overflow.
Definition: Types.h:382
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Definition: Utils.h:961
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:201
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
void add_option_if_else(bool cond, std::string option_true, std::string option_false)
Adds first option if condition is true else the second one.