Compute Library
 21.02
CLMeanStdDev.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
31 
32 using namespace arm_compute;
33 
34 CLMeanStdDev::CLMeanStdDev(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
35  : _memory_group(std::move(memory_manager)),
36  _data_type(),
37  _num_pixels(),
38  _run_stddev(),
39  _reduction_operation_mean(),
40  _reduction_operation_stddev(),
41  _reduction_output_mean(),
42  _reduction_output_stddev(),
43  _mean(nullptr),
44  _stddev(nullptr),
45  _mean_stddev_kernel(std::make_unique<CLMeanStdDevKernel>()),
46  _fill_border_kernel(std::make_unique<CLFillBorderKernel>()),
47  _global_sum(),
48  _global_sum_squared()
49 {
50 }
51 
52 CLMeanStdDev::~CLMeanStdDev() = default;
53 
54 Status CLMeanStdDev::validate(ITensorInfo *input, float *mean, float *stddev)
55 {
57  if(is_data_type_float(input->data_type()))
58  {
59  ARM_COMPUTE_UNUSED(mean);
60  ARM_COMPUTE_UNUSED(stddev);
61 
63  TensorInfo output_shape_info = TensorInfo(output_shape, 1, DataType::U8);
64  return CLReductionOperation::validate(input, &output_shape_info, 0, ReductionOperation::SUM);
65  }
66  else
67  {
68  return CLMeanStdDevKernel::validate(input, mean, nullptr, stddev, nullptr);
69  }
70 }
71 
72 void CLMeanStdDev::configure(ICLImage *input, float *mean, float *stddev)
73 {
74  configure(CLKernelLibrary::get().get_compile_context(), input, mean, stddev);
75 }
76 
77 void CLMeanStdDev::configure(const CLCompileContext &compile_context, ICLImage *input, float *mean, float *stddev)
78 {
79  // In the case of F16/F32 we call reduction operation for calculating CLMeanStdDev
80  _data_type = input->info()->data_type();
81 
82  if(is_data_type_float(_data_type))
83  {
84  _num_pixels = input->info()->dimension(0) * input->info()->dimension(1);
85 
86  _memory_group.manage(&_reduction_output_mean);
87  _reduction_operation_mean.configure(compile_context, input, &_reduction_output_mean, 0, ReductionOperation::SUM);
88  _reduction_output_mean.allocator()->allocate();
89  _mean = mean;
90 
91  if(stddev != nullptr)
92  {
93  _memory_group.manage(&_reduction_output_stddev);
94  _reduction_operation_stddev.configure(compile_context, input, &_reduction_output_stddev, 0, ReductionOperation::SUM_SQUARE);
95  _reduction_output_stddev.allocator()->allocate();
96  _stddev = stddev;
97  _run_stddev = true;
98  }
99  }
100  else
101  {
102  _global_sum = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
103 
104  if(stddev != nullptr)
105  {
106  _global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
107  }
108 
109  _mean_stddev_kernel->configure(compile_context, input, mean, &_global_sum, stddev, &_global_sum_squared);
110  _fill_border_kernel->configure(compile_context, input, _mean_stddev_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
111  }
112 }
113 
114 template <typename T>
115 void CLMeanStdDev::run_float()
116 {
117  MemoryGroupResourceScope scope_mg(_memory_group);
118 
119  // Perform reduction on x-axis
120  _reduction_operation_mean.run();
121  if(_run_stddev)
122  {
123  _reduction_operation_stddev.run();
124  _reduction_output_stddev.map(true);
125  }
126 
127  _reduction_output_mean.map(true);
128 
129  auto mean = static_cast<T>(0);
130 
131  // Calculate final result for mean
132  for(unsigned int i = 0; i < _reduction_output_mean.info()->dimension(1); ++i)
133  {
134  mean += *reinterpret_cast<T *>(_reduction_output_mean.buffer() + _reduction_output_mean.info()->offset_element_in_bytes(Coordinates(0, i)));
135  }
136 
137  mean /= _num_pixels;
138  *_mean = mean;
139 
140  if(_run_stddev)
141  {
142  auto stddev = static_cast<T>(0);
143  // Calculate final result for stddev
144  for(unsigned int i = 0; i < _reduction_output_stddev.info()->dimension(1); ++i)
145  {
146  stddev += *reinterpret_cast<T *>(_reduction_output_stddev.buffer() + _reduction_output_stddev.info()->offset_element_in_bytes(Coordinates(0, i)));
147  }
148  *_stddev = std::sqrt((stddev / _num_pixels) - (mean * mean));
149 
150  _reduction_output_stddev.unmap();
151  }
152  _reduction_output_mean.unmap();
153 }
154 
155 void CLMeanStdDev::run_int()
156 {
157  CLScheduler::get().enqueue(*_fill_border_kernel);
158  CLScheduler::get().enqueue(*_mean_stddev_kernel);
159 }
160 
162 {
163  switch(_data_type)
164  {
165  case DataType::F16:
166  run_float<half>();
167  break;
168  case DataType::F32:
169  run_float<float>();
170  break;
171  case DataType::U8:
172  run_int();
173  break;
174  default:
175  ARM_COMPUTE_ERROR_ON("Not supported");
176  }
177 }
Class describing the value of a pixel for any image format.
Definition: PixelValue.h:34
Shape of a tensor.
Definition: TensorShape.h:39
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor&#39;s metadata.
Definition: CLTensor.cpp:41
static Status validate(ITensorInfo *input, float *mean, float *stddev=nullptr)
Static function to check if given info will lead to a valid configuration of CLMeanStdDev.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
1 channel, 1 U8 per channel
size_t dimension(size_t index) const override
Return the size of the requested dimension.
Definition: TensorInfo.h:233
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
CLTensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: CLTensor.cpp:61
Status class.
Definition: Error.h:52
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
void map(bool blocking=true)
Enqueue a map operation of the allocated buffer.
Definition: CLTensor.cpp:66
uint8_t * buffer() const override
Interface to be implemented by the child class to return a pointer to CPU memory. ...
Definition: ICLTensor.cpp:53
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
CLMeanStdDev(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default Constructor.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev=nullptr, cl::Buffer *global_sum_squared=nullptr)
Static function to check if given info will lead to a valid configuration of CLMeanStdDevKernel.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(t)
Definition: Validate.h:858
Interface for the kernel to calculate mean and standard deviation of input image pixels.
Coordinates of an item.
Definition: Coordinates.h:37
void run() override
Run the kernels contained in the function.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Static function to check if given info will lead to a valid configuration of CLReductionOperation.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
void configure(ICLImage *input, float *mean, float *stddev=nullptr)
Initialise the kernel&#39;s inputs and outputs.
CLCompileContext class.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
Interface for filling the border of a kernel.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
int32_t offset_element_in_bytes(const Coordinates &pos) const override
The offset in bytes from the beginning of the memory allocation to access the element at position (x...
Definition: TensorInfo.cpp:402
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45
void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims=true)
Set the input and output tensors.
~CLMeanStdDev()
Default destructor.
void unmap()
Enqueue an unmap operation of the allocated and mapped buffer.
Definition: CLTensor.cpp:71
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Definition: Utils.h:1148