Compute Library
 23.08
ClElementwiseUnaryKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2021, 2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "arm_compute/core/Utils.h"
31 #include "src/core/CL/CLValidate.h"
33 #include "support/Cast.h"
34 #include "support/StringSupport.h"
35 
36 namespace arm_compute
37 {
38 namespace opencl
39 {
40 namespace kernels
41 {
42 namespace
43 {
44 constexpr unsigned int vector_size_byte_opencl = 16;
45 
46 Status validate_arguments(const ITensorInfo &src, const ITensorInfo &dst, const ElementWiseUnary op)
47 {
50  {
52  }
53  else if(op == ElementWiseUnary::NEG)
54  {
56  }
57  else if(op == ElementWiseUnary::RSQRT) // Allow quantized types for only RSQRT.
58  {
60  }
61  else
62  {
64  }
65 
66  // Validate in case of configured dst
67  if(dst.total_size() > 0)
68  {
72  }
73 
74  return Status{};
75 }
76 } // namespace
77 
79 {
81 }
82 
84 {
86 
87  auto padding_info = get_padding_info({ src, dst });
88 
90  const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst->element_size(), dst->dimension(0));
91 
92  std::string kernel_name = "elementwise_unary";
93  const int vec_size_x = num_elems_processed_per_iteration;
94  const int dst_width_x = dst->dimension(0);
95  if(is_data_type_quantized(src->data_type()))
96  {
97  kernel_name += "_quantized";
98  }
99  // Set kernel build options
100  CLBuildOptions build_opts;
101  build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
102  build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
103  build_opts.add_option("-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(dst_width_x - vec_size_x, 0)));
104  if(is_data_type_quantized(src->data_type()))
105  {
106  const UniformQuantizationInfo iqinfo = src->quantization_info().uniform();
107  const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform();
108  build_opts.add_option("-DOFFSET_IN=" + support::cpp11::to_string(iqinfo.offset));
109  build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(oqinfo.offset));
110  build_opts.add_option("-DSCALE_IN=" + float_to_string_with_full_precision(iqinfo.scale));
111  build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale));
112  }
113  switch(op)
114  {
116  build_opts.add_option("-DOPERATION=rsqrt_op");
117  break;
119  build_opts.add_option("-DOPERATION=exp_op");
120  break;
122  build_opts.add_option("-DOPERATION=neg_op");
123  break;
125  build_opts.add_option("-DOPERATION=sin_op");
126  break;
128  build_opts.add_option("-DOPERATION=fabs_op");
129  break;
131  build_opts.add_option("-DOPERATION=natural_log_op");
132  break;
134  build_opts.add_option("-DOPERATION=round_op");
135  break;
137  build_opts.add_option("-DOPERATION=logical_not_op");
138  break;
139  default:
140  ARM_COMPUTE_ERROR("Not implemented");
141  }
142 
143  // Create kernel
144  _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
145 
146  // Configure kernel window
148  win.set(Window::DimX, Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
149 
150  ICLKernel::configure_internal(win);
151 
153 }
154 
156 {
157  ARM_COMPUTE_UNUSED(op);
160 
161  return Status{};
162 }
163 
164 void ClElementWiseUnaryKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
165 {
168 
170  Window slice = collapsed.first_slice_window_3D();
171 
172  const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
173  auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
174 
175  do
176  {
177  unsigned int idx = 0;
180  enqueue(queue, *this, slice, lws_hint());
181  }
182  while(collapsed.slide_window_slice_3D(slice));
183 }
184 } // namespace kernels
185 } // namespace opencl
186 } // namespace arm_compute
arm_compute::support::cpp11::to_string
std::string to_string(T &&value)
Convert integer and float values to string.
Definition: StringSupport.h:168
Cast.h
StringSupport.h
arm_compute::Window::Dimension::start
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:97
arm_compute::UniformQuantizationInfo::offset
int32_t offset
Definition: QuantizationInfo.h:64
arm_compute::experimental::dynamic_fusion::vector_size_byte_opencl
constexpr unsigned int vector_size_byte_opencl
Definition: ClTemplateElementwiseBinary.cpp:42
arm_compute::test::validation::src
SimpleTensor< float > src
Definition: DFT.cpp:155
AdjustVecSize.h
ICLTensor.h
arm_compute::CLBuildOptions::options
const StringSet & options() const
Gets the current options list set.
Definition: CLCompileContext.cpp:72
arm_compute::calculate_max_window
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Definition: WindowHelpers.cpp:28
arm_compute::opencl::kernels::ClElementWiseUnaryKernel::run_op
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
Definition: ClElementwiseUnaryKernel.cpp:164
arm_compute::DataType::QASYMM8
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
arm_compute::opencl::kernels::ClElementWiseUnaryKernel::ClElementWiseUnaryKernel
ClElementWiseUnaryKernel()
Definition: ClElementwiseUnaryKernel.cpp:78
arm_compute::test::validation::dst
auto dst
Definition: DFT.cpp:170
arm_compute::cpu::kernels::validate_arguments
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
Definition: CpuDirectConv2dKernel.cpp:60
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:1004
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:528
arm_compute::Window::DimX
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
arm_compute::Window::collapse_if_possible
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
Definition: Window.inl:68
ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:353
arm_compute::ITensorPack::get_tensor
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:64
arm_compute::UniformQuantizationInfo
Quantization info when assuming per layer quantization.
Definition: QuantizationInfo.h:41
arm_compute::ElementWiseUnary::RSQRT
@ RSQRT
Reverse square root.
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:630
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:877
arm_compute::opencl::kernels::ClElementWiseUnaryKernel::configure
void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const ElementWiseUnary &op)
Initialise the kernel's srcs, dst.
Definition: ClElementwiseUnaryKernel.cpp:83
StringUtils.h
ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
arm_compute::ElementWiseUnary::SIN
@ SIN
Sine.
ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
arm_compute::ElementWiseUnary::ABS
@ ABS
Absolute value.
ARM_COMPUTE_ERROR_ON
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:467
arm_compute::ITensorPack::get_const_tensor
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:54
ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:456
arm_compute::ITensorPack
Tensor packing service.
Definition: ITensorPack.h:39
arm_compute::CLBuildOptions::add_option
void add_option(std::string option)
Adds option to the existing build option list.
Definition: CLCompileContext.cpp:41
arm_compute::CLCompileContext
CLCompileContext class.
Definition: CLCompileContext.h:204
arm_compute::ACL_DST
@ ACL_DST
Definition: Types.h:55
arm_compute::DataType::U8
@ U8
unsigned 8-bit number
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
Definition: CLValidate.h:35
arm_compute::create_kernel
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:404
arm_compute::Status
Status class.
Definition: Error.h:52
arm_compute::DataType::QASYMM8_SIGNED
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
WindowHelpers.h
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
arm_compute::float_to_string_with_full_precision
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
Definition: StringUtils.cpp:52
arm_compute::Window::slide_window_slice_3D
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
Definition: Window.h:349
arm_compute::ElementWiseUnary::NEG
@ NEG
Negate.
ARM_COMPUTE_UNUSED
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
arm_compute::Window::Dimension
Describe one of the image's dimensions with a start, end and step.
Definition: Window.h:79
arm_compute::Window::set
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
arm_compute::ceil_to_multiple
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
Definition: Math.h:50
arm_compute::Window::first_slice_window_3D
Window first_slice_window_3D() const
First 3D slice of the window.
Definition: Window.h:305
CLValidate.h
arm_compute::IKernel::window
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
arm_compute::get_cl_type_from_data_type
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
Definition: CLHelpers.cpp:40
arm_compute::ICLKernel::add_3D_tensor_argument
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
Definition: ICLKernel.h:222
Utils.h
arm_compute::UniformQuantizationInfo::scale
float scale
Definition: QuantizationInfo.h:63
arm_compute::Window
Describe a multidimensional execution window.
Definition: Window.h:39
arm_compute::ELEMENTWISE
@ ELEMENTWISE
Elementwise CL kernel type.
Definition: CLTypes.h:85
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::ElementWiseUnary::EXP
@ EXP
Exponential.
arm_compute::opencl::kernels::ClElementWiseUnaryKernel::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ElementWiseUnary &op)
Static function to check if given info will lead to a valid configuration.
Definition: ClElementwiseUnaryKernel.cpp:155
arm_compute::DataType::F16
@ F16
16-bit floating-point number
arm_compute::adjust_vec_size
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension,...
Definition: AdjustVecSize.h:38
arm_compute::has_padding_changed
bool has_padding_changed(const std::unordered_map< const ITensorInfo *, PaddingSize > &padding_map)
Check if the previously stored padding info has changed after configuring a kernel.
Definition: Utils.cpp:462
arm_compute::Window::DimZ
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Definition: Window.h:47
arm_compute::DataType::S32
@ S32
signed 32-bit number
arm_compute::ICLKernel::lws_hint
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:371
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
arm_compute::is_data_type_quantized
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Definition: DataTypeUtils.h:324
arm_compute::ACL_SRC
@ ACL_SRC
Definition: Types.h:44
arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:43
arm_compute::DataType::F32
@ F32
32-bit floating-point number
arm_compute::CLBuildOptions
Build options.
Definition: CLCompileContext.h:38
arm_compute::Window::Dimension::end
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:102
num_elems_processed_per_iteration
unsigned int num_elems_processed_per_iteration
Definition: ClIm2ColKernel.cpp:59
arm_compute::Window::x
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:159
ClElementwiseUnaryKernel.h
arm_compute::ElementWiseUnary::ROUND
@ ROUND
Round.
arm_compute::get_padding_info
std::unordered_map< const ITensorInfo *, PaddingSize > get_padding_info(std::initializer_list< const ITensorInfo * > infos)
Stores padding information before configuring a kernel.
Definition: Utils.cpp:447
arm_compute::ElementWiseUnary::LOGICAL_NOT
@ LOGICAL_NOT
Logical Not.
arm_compute::test::validation::reference::slice
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
Definition: SliceOperations.cpp:38
arm_compute::ElementWiseUnary
ElementWiseUnary
Available element wise unary operations.
Definition: Types.h:445
CLHelpers.h
kernel_name
std::string kernel_name
Definition: ClIm2ColKernel.cpp:57
arm_compute::ElementWiseUnary::LOG
@ LOG
Natural Logarithm.
arm_compute::enqueue
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
Definition: ICLKernel.cpp:32