44 constexpr
unsigned int vector_size_byte_opencl = 16;
46 std::map<ArithmeticOperation, std::string> supported_arithmetic_ops =
58 std::map<ArithmeticOperation, std::string> supported_sat_arithmetic_ops =
64 std::string generate_id_for_tuning_common(
const std::string &
kernel_name,
const ITensorInfo &src1,
const ITensorInfo &
dst)
66 std::string config_id;
78 Status validate_in_place_output_shape(
const bool in_place,
const bool src1_in_place,
const ITensorInfo &src1,
const ITensorInfo &src2,
const ITensorInfo &dst,
const TensorShape &out_shape)
83 "Wrong shape for dst, cannot do in_place calculation");
88 "Wrong shape for dst");
93 Status validate_arguments_with_float_only_supported_rules(
const ITensorInfo &src1,
const ITensorInfo &src2,
const ITensorInfo &dst)
101 const bool in_place = (&src1 == &
dst) || (&src2 == &dst);
102 const bool src1_in_place = in_place && (&src1 == &
dst);
109 if(dst.total_size() > 0)
119 Status validate_arguments_divide_operation(
const ITensorInfo *src1,
const ITensorInfo *src2,
const ITensorInfo *dst)
127 const bool in_place = (src1 ==
dst) || (src2 == dst);
128 const bool src1_in_place = in_place && (src1 ==
dst);
135 if(dst->total_size() > 0)
145 Status validate_arguments_with_arithmetic_rules(
const ITensorInfo &src1,
const ITensorInfo &src2,
const ITensorInfo &dst)
155 const int32_t in1_offset = src1.quantization_info().uniform().offset;
156 const int32_t in2_offset = src2.quantization_info().uniform().offset;
162 const bool in_place = (&src1 == &
dst) || (&src2 == &dst);
163 const bool src1_in_place = in_place && (&src1 == &
dst);
169 if(dst.total_size() > 0)
177 const int32_t
offset = dst.quantization_info().uniform().offset;
184 CLBuildOptions generate_build_options_with_arithmetic_rules(
const ITensorInfo &src1,
const ITensorInfo &src2,
const ITensorInfo &dst,
const std::string &operation_string)
186 CLBuildOptions build_opts;
195 build_opts.add_option(
"-DOP=" + operation_string);
198 const UniformQuantizationInfo iq1info = src1.quantization_info().uniform();
199 const UniformQuantizationInfo iq2info = src2.quantization_info().uniform();
200 const UniformQuantizationInfo oqinfo = dst.quantization_info().uniform();
209 build_opts.add_option_if(src1.data_type() ==
DataType::S32,
"-DS32");
212 const bool in_place = (&src1 == &
dst) || (&src2 == &dst);
213 const bool src1_in_place = in_place && (&src1 == &
dst);
214 build_opts.add_option_if(in_place,
"-DIN_PLACE");
215 build_opts.add_option_if(src1_in_place,
"-DSRC1_IN_PLACE");
220 std::pair<Status, Window> configure_window_arithmetic_common(ITensorInfo &dst)
222 const unsigned int num_elems_processed_per_iteration =
adjust_vec_size(vector_size_byte_opencl / dst.element_size(), dst.dimension(0));
224 return std::make_pair(Status{}, win);
227 std::pair<Status, Window> validate_and_configure_window_for_arithmetic_operators(ITensorInfo &src1, ITensorInfo &src2, ITensorInfo &dst)
230 const TensorShape &out_shape = broadcast_pair.first;
234 return configure_window_arithmetic_common(dst);
237 std::pair<Status, Window> validate_and_configure_window_for_logical_binary_operators(ITensorInfo &src1, ITensorInfo &src2, ITensorInfo &dst)
240 const TensorShape &out_shape = broadcast_pair.first;
245 return configure_window_arithmetic_common(dst);
248 std::pair<Status, Window> validate_and_configure_window_for_division(ITensorInfo &src1, ITensorInfo &src2, ITensorInfo &dst)
251 const TensorShape &out_shape = broadcast_pair.first;
255 return configure_window_arithmetic_common(dst);
267 auto win_config = validate_and_configure_window(*src1, *src2, *dst);
270 std::string kernel_name =
"elementwise_operation_" + name();
273 kernel_name +=
"_quantized";
277 CLBuildOptions build_opts = generate_build_options(*src1, *src2, *dst);
288 ICLKernel::configure_internal(win_config.second);
290 _config_id = generate_id_for_tuning(kernel_name, *src1, *dst);
304 const TensorShape &in_shape1 = src_0->info()->tensor_shape();
305 const TensorShape &in_shape2 = src_1->info()->tensor_shape();
306 const TensorShape &out_shape = dst->info()->tensor_shape();
308 bool can_collapse =
true;
315 can_collapse = (in_shape1[d] == in_shape2[d]);
319 bool has_collapsed =
false;
330 const bool in_place = (src_0 ==
dst) || (src_1 == dst);
333 unsigned int idx = 0;
355 configure_common(compile_context, src1, src2, dst);
373 std::string ClLogicalBinaryKernel::name()
391 return validate_and_configure_window_for_logical_binary_operators(src1, src2, dst);
397 return generate_build_options_with_arithmetic_rules(src1, src2, dst, name());
400 std::string ClLogicalBinaryKernel::generate_id_for_tuning(
const std::string &kernel_name,
const ITensorInfo &src1,
const ITensorInfo &dst)
402 return generate_id_for_tuning_common(kernel_name, src1, dst);
416 _act_info = act_info;
417 configure_common(compile_context, input1, input2, output);
435 return validate_and_configure_window_for_arithmetic_operators(input1, input2, output);
441 auto build_options = generate_build_options_with_arithmetic_rules(input1, input2, output, name());
442 build_options.add_option((_policy ==
ConvertPolicy::WRAP || has_float_out) ?
"-DWRAP" :
"-DSATURATE");
446 std::string ClSaturatedArithmeticKernel::generate_id_for_tuning(
const std::string &kernel_name,
const ITensorInfo &input1,
const ITensorInfo &output)
448 auto config_id = generate_id_for_tuning_common(kernel_name, input1, output);
454 std::string ClSaturatedArithmeticKernel::name()
456 return supported_sat_arithmetic_ops[_op];
468 _act_info = act_info;
469 configure_common(compile_context, src1, src2, dst);
502 return validate_and_configure_window_for_division(src1, src2, dst);
506 return validate_and_configure_window_for_arithmetic_operators(src1, src2, dst);
512 return generate_build_options_with_arithmetic_rules(src1, src2, dst, name());
514 std::string ClArithmeticKernel::generate_id_for_tuning(
const std::string &kernel_name,
const ITensorInfo &src1,
const ITensorInfo &dst)
516 return generate_id_for_tuning_common(kernel_name, src1, dst);
519 std::string ClArithmeticKernel::name()
521 return supported_arithmetic_ops[_op];
static Status validate(LogicalOperation op, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration.
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
static Status validate(ArithmeticOperation op, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration.
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
#define ARM_COMPUTE_ASSERT(cond)
ArithmeticOperation
Available element-wise operations.
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
const Window & window() const
The maximum window the kernel can be executed on.
quantized, symmetric fixed-point 16-bit number
bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type)
Set the data type and number of channels to the specified value if the current data type is unknown...
bool enabled() const
Check if initialised.
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint=CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items=false)
Add the kernel to the command queue with the given window.
const StringSet & options() const
Gets the current options list set.
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
TensorShape collapsed_from(size_t start) const
Return a copy with collapsed dimensions starting from a given point.
1 channel, 1 U8 per channel
float a() const
Get the alpha value.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
static TensorShape broadcast_shape(const Shapes &... shapes)
If shapes are broadcast compatible, return the broadcasted shape.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
const std::string & string_from_activation_func(ActivationLayerInfo::ActivationFunction act)
Translates a given activation function to a string.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
std::string lower_string(const std::string &val)
Lower a given string.
bool is_data_type_quantized_symmetric(DataType dt)
Check if a given data type is of symmetric quantized type.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
static std::pair< TensorShape, ValidRegion > broadcast_shape_and_valid_region(const Infos &... infos)
If infos are broadcast compatible tensor info's, return the broadcasted shape and the intersection of...
Activation Layer Information class.
std::set< std::string > build_options
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx...
void configure(const ClCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of ClSaturatedArithmeticKer...
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
1 channel, 1 S32 per channel
void add_option(std::string option)
Adds option to the existing build option list.
const std::string & config_id() const
Get the configuration ID.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override
quantized, asymmetric fixed-point 8-bit number unsigned
static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration.
size_t total_size() const
Collapses all dimensions to a single linear total size.
unsigned int num_elems_processed_per_iteration
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
Set the shape to the specified value if the current assignment is empty.
bool have_different_dimensions(const Dimensions< T > &dim1, const Dimensions< T > &dim2, unsigned int upper_dim)
Window broadcast_if_dimension_le_one(const TensorShape &shape) const
Don't advance in the dimension where shape is less equal to 1.
Elementeise CL kernel type.
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
y*x if x < 0, x otherwise
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
1 channel, 1 S16 per channel
bool has_padding_changed(const std::unordered_map< const ITensorInfo *, PaddingSize > &padding_map)
Check if the previously stored padding info has changed after configuring a kernel.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
const std::string & string_from_data_layout(DataLayout dl)
Convert a data layout identity into a string.
void configure(const ClCompileContext &compile_context, LogicalOperation op, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst)
Function to configure kernel.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
unsigned int num_dimensions() const
Returns the effective dimensionality of the tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
std::unordered_map< const ITensorInfo *, PaddingSize > get_padding_info(std::initializer_list< const ITensorInfo *> infos)
Stores padding information before configuring a kernel.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size.
ActivationFunction activation() const
Get the type of activation function.
float b() const
Get the beta value.
quantized, asymmetric fixed-point 8-bit number signed
void configure(const ClCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of ClArithmeticKernel.
Window first_slice_window_3D() const
First 3D slice of the window.
LogicalOperation
List of supported logical operations.
Describe a multidimensional execution window.
ConvertPolicy
Policy to handle overflow.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.