42 : _input(nullptr), _mean(nullptr), _stddev(nullptr), _global_sum(nullptr), _global_sum_squared(nullptr), _border_size(0)
78 _global_sum = global_sum;
79 _global_sum_squared = global_sum_squared;
82 std::set<std::string> build_opts;
84 if(_stddev !=
nullptr)
86 build_opts.insert(
"-DSTDDEV");
89 _kernel =
create_kernel(compile_context,
"mean_stddev_accumulate", build_opts);
94 _kernel.setArg(idx++, static_cast<cl_uint>(input->
info()->
dimension(1)));
95 _kernel.setArg(idx++, *_global_sum);
97 if(_stddev !=
nullptr)
99 _kernel.setArg(idx++, *_global_sum_squared);
103 constexpr
unsigned int num_elems_processed_per_iteration_x = 8;
104 const unsigned int num_elems_processed_per_iteration_y = input->
info()->
dimension(1);
109 AccessWindowRectangle input_access(input->
info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
112 ICLKernel::configure_internal(win);
121 static const cl_ulong zero = 0;
122 queue.enqueueWriteBuffer(*_global_sum, CL_FALSE, 0,
sizeof(cl_ulong), &zero);
124 if(_stddev !=
nullptr)
126 queue.enqueueWriteBuffer(*_global_sum_squared, CL_FALSE, 0,
sizeof(cl_ulong), &zero);
133 unsigned int idx = 0;
143 cl_ulong global_sum = 0;
144 cl_ulong global_sum_squared = 0;
147 queue.enqueueReadBuffer(*_global_sum, CL_TRUE, 0,
sizeof(cl_ulong), static_cast<void *>(&global_sum));
148 const float mean = global_sum / num_pixels;
151 if(_stddev !=
nullptr)
153 queue.enqueueReadBuffer(*_global_sum_squared, CL_TRUE, 0,
sizeof(cl_ulong), static_cast<void *>(&global_sum_squared));
154 *_stddev = std::sqrt((global_sum_squared / num_pixels) - (mean * mean));
Window first_slice_window_2D() const
First 2D slice of the window.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
Container for 2D border size.
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
1 channel, 1 U8 per channel
void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev=nullptr, cl::Buffer *global_sum_squared=nullptr)
Initialise the kernel's input and outputs.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
bool slide_window_slice_2D(Window &slice) const
Slide the passed 2D window slice.
Copyright (c) 2017-2021 Arm Limited.
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
Implementation of a rectangular access pattern.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev=nullptr, cl::Buffer *global_sum_squared=nullptr)
Static function to check if given info will lead to a valid configuration of CLMeanStdDevKernel.
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
#define ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(t)
Class to describe a number of elements in each dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED()
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
static constexpr unsigned int num_arguments_per_2D_tensor()
Returns the number of arguments enqueued per 2D tensor object.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
void set_dimension_step(size_t dimension, int step)
Set the step of a given dimension.
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx...
Interface for OpenCL tensor.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Wrapper to configure the Khronos OpenCL C++ header.
BorderSize border_size() const override
The size of the border for that kernel.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
CLMeanStdDevKernel()
Default constructor.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)