43 Status
validate_arguments(
const ITensorInfo *input1,
const ITensorInfo *input2,
const ITensorInfo *output,
float scale,
69 if(output->total_size() > 0)
77 "Output can only be U8 if both inputs are U8");
79 "Output can only be QASYMM8 if both inputs are QASYMM8");
81 "Output can only be QASYMM8_SIGNED if both inputs are QASYMM8_SIGNED");
83 "Output can only be QSYMM16 if both inputs are QSYMM16");
85 "Output can only be S32 if both inputs are QSYMM16");
92 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
95 const TensorShape &out_shape = broadcast_pair.first;
125 Window win_input1 = win.broadcast_if_dimension_le_one(*input1);
126 Window win_input2 = win.broadcast_if_dimension_le_one(*input2);
128 AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration);
129 AccessWindowHorizontal input2_access(input2, 0, num_elems_processed_per_iteration);
130 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
136 output_access.set_valid_region(win, valid_region);
139 return std::make_pair(err, win);
144 : _input1(nullptr), _input2(nullptr), _output(nullptr)
159 scale, overflow_policy, rounding_policy, act_info));
162 auto win_config = validate_and_configure_window(input1, input2, output);
172 float normalized_mantissa = std::frexp(scale, &exponent);
176 if((normalized_mantissa == 0.5f) && (-14 <= exponent) && (exponent <= 1))
180 scale_int = std::abs(exponent - 1);
183 std::string acc_type;
228 kernel_name +=
"_quantized";
232 kernel_name += (scale_int >= 0) ?
"_int" :
"_float";
235 build_opts.
add_option(
"-DACC_DATA_TYPE=" + acc_type);
250 if(scale_int >= 0 && !is_quantized)
252 _kernel.setArg(idx++, scale_int);
256 _kernel.setArg(idx++, scale);
259 ICLKernel::configure_internal(win_config.second);
281 const TensorShape &in_shape1 = src_0->info()->tensor_shape();
282 const TensorShape &in_shape2 = src_1->info()->tensor_shape();
283 const TensorShape &out_shape = dst->info()->tensor_shape();
285 bool can_collapse =
true;
286 if(std::min(in_shape1.total_size(), in_shape2.
total_size()) > 1)
291 can_collapse = (in_shape1[d] == in_shape2[d]);
295 bool has_collapsed =
false;
307 unsigned int idx = 0;
322 const unsigned int border = std::min<unsigned int>(num_elems_processed_per_iteration - 1
U, replicateSize);
328 constexpr
unsigned int num_elems_processed_per_iteration_complex = 1;
355 const TensorShape &out_shape = broadcast_pair.first;
356 const ValidRegion &valid_region = broadcast_pair.second;
377 return std::make_pair(err, win);
382 : _input1(nullptr), _input2(nullptr), _output(nullptr)
397 auto win_config = validate_and_configure_window_complex(input1, input2, output);
416 ICLKernel::configure_internal(win_config.second);
437 const TensorShape &in_shape1 = src_0->info()->tensor_shape();
438 const TensorShape &in_shape2 = src_1->info()->tensor_shape();
439 const TensorShape &out_shape = dst->info()->tensor_shape();
441 bool can_collapse =
true;
442 if(std::min(in_shape1.total_size(), in_shape2.
total_size()) > 1)
447 can_collapse = (in_shape1[d] == in_shape2[d]);
451 bool has_collapsed =
false;
463 unsigned int idx = 0;
478 const unsigned int border = std::min<unsigned int>(num_elems_processed_per_iteration_complex - 1
U, replicateSize);
bool set_format_if_unknown(ITensorInfo &info, Format format)
Set the format, data type and number of channels to the specified value if the current data type is u...
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor)
const Window & window() const
The maximum window the kernel can be executed on.
quantized, symmetric fixed-point 16-bit number
bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type)
Set the data type and number of channels to the specified value if the current data type is unknown...
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
Container for 2D border size.
const StringSet & options() const
Gets the current options list set.
void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's input, output and border mode.
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
TensorShape collapsed_from(size_t start) const
Return a copy with collapsed dimensions starting from a given point.
1 channel, 1 U8 per channel
float a() const
Get the alpha value.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
std::string to_string(T &&value)
Convert integer and float values to string.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
static TensorShape broadcast_shape(const Shapes &... shapes)
If shapes are broadcast compatible, return the broadcasted shape.
const std::string & string_from_activation_func(ActivationLayerInfo::ActivationFunction act)
Translates a given activation function to a string.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
std::string lower_string(const std::string &val)
Lower a given string.
const ValidRegion valid_region
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
static std::pair< TensorShape, ValidRegion > broadcast_shape_and_valid_region(const Infos &... infos)
If infos are broadcast compatible tensor info's, return the broadcasted shape and the intersection of...
Activation Layer Information class.
void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx...
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
1 channel, 1 S32 per channel
void add_option(std::string option)
Adds option to the existing build option list.
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of CLPixelWiseMultiplicatio...
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
CLComplexPixelWiseMultiplicationKernel()
Default constructor.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
static constexpr unsigned int num_arguments_per_3D_tensor()
Returns the number of arguments enqueued per 3D tensor object.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed=nullptr) const
Collapse the dimensions between first and last if possible.
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
BorderSize border_size() const override
The size of the border for that kernel.
Implementation of a row access pattern.
size_t total_size() const
Collapses all dimensions to a single linear total size.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
std::string get_cl_type_from_data_type(const DataType &dt)
Translates a tensor data type to the appropriate OpenCL type.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
RoundingPolicy
Rounding method.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
Set the shape to the specified value if the current assignment is empty.
bool have_different_dimensions(const Dimensions< T > &dim1, const Dimensions< T > &dim2, unsigned int upper_dim)
void add_option_if(bool cond, std::string option)
Adds option if a given condition is true;.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
Window broadcast_if_dimension_le_one(const TensorShape &shape) const
Don't advance in the dimension where shape is less equal to 1.
bool slide_window_slice_3D(Window &slice) const
Slide the passed 3D window slice.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
1 channel, 1 S16 per channel
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
BorderSize border_size() const override
The size of the border for that kernel.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
static constexpr size_t DimZ
Alias for dimension 2 also known as Z dimension.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
unsigned int num_dimensions() const
Returns the effective dimensionality of the tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
Wrapper to configure the Khronos OpenCL C++ header.
unsigned int num_elems_processed_per_iteration
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
CLPixelWiseMultiplicationKernel()
Default constructor.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of CLComplexPixelWiseMultip...
ActivationFunction activation() const
Get the type of activation function.
float b() const
Get the beta value.
quantized, asymmetric fixed-point 8-bit number signed
Container for valid region of a window.
void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's input, output and border mode.
Window first_slice_window_3D() const
First 3D slice of the window.
Truncates the least significant values that are lost in operations.
Describe a multidimensional execution window.
ConvertPolicy
Policy to handle overflow.
virtual size_t num_channels() const =0
The number of channels for each tensor element.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
void set_valid_region(const Window &window, const ValidRegion &input_valid_region, bool border_undefined=false, const BorderSize &border_size=BorderSize(0))
Set the valid region based on access pattern, valid region of the inputs and border mode...
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
void add_option_if_else(bool cond, std::string option_true, std::string option_false)
Adds first option if condition is true else the second one.