Compute Library
 21.02
CLChannelCombineKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
33 #include "arm_compute/core/Utils.h"
36 
37 #include <set>
38 #include <string>
39 
40 namespace arm_compute
41 {
42 namespace
43 {
44 constexpr unsigned int num_elems_processed_per_iteration = 16;
45 } // namespace
46 
48  : _planes{ { nullptr } }, _output(nullptr), _output_multi(nullptr), _x_subsampling{ { 1, 1, 1 } }, _y_subsampling{ { 1, 1, 1 } }
49 {
50 }
51 
52 void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output)
53 {
54  configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, plane3, output);
55 }
56 
57 void CLChannelCombineKernel::configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output)
58 {
59  ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output);
64 
69 
73 
74  const Format output_format = output->info()->format();
75 
76  // Check if horizontal dimension of Y plane is even and validate horizontal sub-sampling dimensions for U and V planes
77  if(Format::YUYV422 == output_format || Format::UYVY422 == output_format)
78  {
79  // Validate Y plane of input and output
80  ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(output_format, plane0, output);
81 
82  // Validate U and V plane of the input
83  ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), plane1, plane2);
84  }
85 
86  _planes[0] = plane0;
87  _planes[1] = plane1;
88  _planes[2] = plane2;
89  _planes[3] = nullptr;
90 
91  // Validate the last input tensor only for RGBA format
92  if(Format::RGBA8888 == output_format)
93  {
96 
99 
100  _planes[3] = plane3;
101  }
102 
103  _output = output;
104  _output_multi = nullptr;
105 
106  // Half the processed elements for U and V channels due to horizontal sub-sampling of 2
107  if(Format::YUYV422 == output_format || Format::UYVY422 == output_format)
108  {
109  _x_subsampling[1] = 2;
110  _x_subsampling[2] = 2;
111  }
112 
113  // Create kernel
114  std::string kernel_name = "channel_combine_" + string_from_format(output_format);
115  _kernel = create_kernel(compile_context, kernel_name);
116 
117  // Configure window
119 
121  AccessWindowRectangle plane1_access(plane1->info(), 0, 0, num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]);
122  AccessWindowRectangle plane2_access(plane2->info(), 0, 0, num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]);
123  AccessWindowHorizontal plane3_access(plane3 == nullptr ? nullptr : plane3->info(), 0, num_elems_processed_per_iteration);
125 
126  update_window_and_padding(win, plane0_access, plane1_access, plane2_access, plane3_access, output_access);
127 
129  plane1->info()->valid_region(),
130  plane2->info()->valid_region());
131  if(plane3 != nullptr)
132  {
133  valid_region = intersect_valid_regions(plane3->info()->valid_region(), valid_region);
134  }
135  output_access.set_valid_region(win, ValidRegion(valid_region.anchor, output->info()->tensor_shape()));
136 
137  ICLKernel::configure_internal(win);
138 }
139 
140 void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
141 {
142  configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, output);
143 }
144 
145 void CLChannelCombineKernel::configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
146 {
147  ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output);
151 
156 
160 
161  const Format output_format = output->info()->format();
162 
163  // Validate shape of Y plane to be even and shape of sub-sampling dimensions for U and V planes
164  // Perform validation only for formats which require sub-sampling.
165  if(Format::YUV444 != output_format)
166  {
167  // Validate Y plane of input and output
168  ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(output_format, plane0, output->plane(0));
169 
170  // Validate U and V plane of the input
171  ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), plane1, plane2);
172 
173  // Validate second plane U (NV12 and NV21 have a UV88 combined plane while IYUV has only the U plane)
174  // MultiImage generates the correct tensor shape but also check in case the tensor shape of planes was changed to a wrong size
175  ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), output->plane(1));
176 
177  // Validate the last plane V of format IYUV
178  if(Format::IYUV == output_format)
179  {
180  // Validate Y plane of the output
181  ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), output->plane(2));
182  }
183  }
184 
185  // Set input tensors
186  _planes[0] = plane0;
187  _planes[1] = plane1;
188  _planes[2] = plane2;
189  _planes[3] = nullptr;
190 
191  // Set output tensor
192  _output = nullptr;
193  _output_multi = output;
194 
195  bool has_two_planars = false;
196 
197  // Set sub-sampling parameters for each plane
198  std::string kernel_name;
199  std::set<std::string> build_opts;
200 
201  if(Format::NV12 == output_format || Format::NV21 == output_format)
202  {
203  _x_subsampling = { { 1, 2, 2 } };
204  _y_subsampling = { { 1, 2, 2 } };
205  kernel_name = "channel_combine_NV";
206  build_opts.emplace(Format::NV12 == output_format ? "-DNV12" : "-DNV21");
207  has_two_planars = true;
208  }
209  else
210  {
211  if(Format::IYUV == output_format)
212  {
213  _x_subsampling = { { 1, 2, 2 } };
214  _y_subsampling = { { 1, 2, 2 } };
215  }
216 
217  kernel_name = "copy_planes_3p";
218  build_opts.emplace(Format::IYUV == output_format ? "-DIYUV" : "-DYUV444");
219  }
220 
221  // Create kernel
222  _kernel = create_kernel(compile_context, kernel_name, build_opts);
223 
224  // Configure window
226 
227  AccessWindowRectangle input_plane0_access(plane0->info(), 0, 0, num_elems_processed_per_iteration, 1.f);
228  AccessWindowRectangle input_plane1_access(plane1->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]);
229  AccessWindowRectangle input_plane2_access(plane2->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]);
230  AccessWindowRectangle output_plane0_access(output->plane(0)->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f, 1.f / _y_subsampling[1]);
231  AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]);
232  AccessWindowRectangle output_plane2_access(has_two_planars ? nullptr : output->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]);
233 
235  input_plane0_access, input_plane1_access, input_plane2_access,
236  output_plane0_access, output_plane1_access, output_plane2_access);
237 
238  ValidRegion plane0_valid_region = plane0->info()->valid_region();
239  ValidRegion output_plane1_region = has_two_planars ? intersect_valid_regions(plane1->info()->valid_region(), plane2->info()->valid_region()) : plane2->info()->valid_region();
240  output_plane0_access.set_valid_region(win, ValidRegion(plane0_valid_region.anchor, output->plane(0)->info()->tensor_shape()));
241  output_plane1_access.set_valid_region(win, ValidRegion(output_plane1_region.anchor, output->plane(1)->info()->tensor_shape()));
242  output_plane2_access.set_valid_region(win, ValidRegion(plane2->info()->valid_region().anchor, output->plane(2)->info()->tensor_shape()));
243 
244  ICLKernel::configure_internal(win);
245 }
246 
247 void CLChannelCombineKernel::run(const Window &window, cl::CommandQueue &queue)
248 {
251 
254 
255  do
256  {
257  // Subsampling in plane 1
258  Window win_sub_plane1(slice);
259  win_sub_plane1.set(Window::DimX, Window::Dimension(win_sub_plane1.x().start() / _x_subsampling[1], win_sub_plane1.x().end() / _x_subsampling[1], win_sub_plane1.x().step() / _x_subsampling[1]));
260  win_sub_plane1.set(Window::DimY, Window::Dimension(win_sub_plane1.y().start() / _y_subsampling[1], win_sub_plane1.y().end() / _y_subsampling[1], 1));
261 
262  // Subsampling in plane 2
263  Window win_sub_plane2(slice);
264  win_sub_plane2.set(Window::DimX, Window::Dimension(win_sub_plane2.x().start() / _x_subsampling[2], win_sub_plane2.x().end() / _x_subsampling[2], win_sub_plane2.x().step() / _x_subsampling[2]));
265  win_sub_plane2.set(Window::DimY, Window::Dimension(win_sub_plane2.y().start() / _y_subsampling[2], win_sub_plane2.y().end() / _y_subsampling[2], 1));
266 
267  unsigned int idx = 0;
268 
269  // Set inputs
270  add_2D_tensor_argument(idx, _planes[0], slice);
271  add_2D_tensor_argument(idx, _planes[1], win_sub_plane1);
272  add_2D_tensor_argument(idx, _planes[2], win_sub_plane2);
273  add_2D_tensor_argument_if((nullptr != _planes[3]), idx, _planes[3], slice);
274 
275  // Set outputs
276  if(nullptr != _output) // Single planar output
277  {
278  add_2D_tensor_argument(idx, _output, slice);
279  }
280  else // Multi-planar output
281  {
282  // Reduce slice in case of subsampling to avoid out-of bounds access
283  slice.set(Window::DimY, Window::Dimension(slice.y().start() / _y_subsampling[1], slice.y().end() / _y_subsampling[1], 1));
284 
285  add_2D_tensor_argument(idx, _output_multi->cl_plane(0), slice);
286  add_2D_tensor_argument(idx, _output_multi->cl_plane(1), win_sub_plane1);
287  add_2D_tensor_argument_if((3 == num_planes_from_format(_output_multi->info()->format())), idx, _output_multi->cl_plane(2), win_sub_plane2);
288 
289  _kernel.setArg(idx++, slice.y().end());
290  }
291 
292  enqueue(queue, *this, slice, lws_hint());
293  }
294  while(window.slide_window_slice_2D(slice));
295 }
296 } // namespace arm_compute
Window first_slice_window_2D() const
First 2D slice of the window.
Definition: Window.h:283
A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
void add_2D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx ...
Definition: ICLKernel.h:159
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
Definition: IGCKernel.cpp:41
#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t)
Definition: Validate.h:856
size_t num_planes_from_format(Format format)
Return the number of planes for a given format.
Definition: Utils.h:451
constexpr int step() const
Return the step of the dimension.
Definition: Window.h:104
cl::NDRange lws_hint() const
Return the Local-Workgroup-Size hint.
Definition: ICLKernel.h:276
1 channel, 1 U8 per channel
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Interface for OpenCL multi-planar images.
Definition: ICLMultiImage.h:36
A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling.
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:77
virtual ICLImage * cl_plane(unsigned int index)=0
Return a pointer to the requested OpenCL plane of the image.
#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t,...)
Definition: Validate.h:643
const ValidRegion valid_region
Definition: Scale.cpp:221
A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling.
bool slide_window_slice_2D(Window &slice) const
Slide the passed 2D window slice.
Definition: Window.h:323
Copyright (c) 2017-2021 Arm Limited.
virtual ValidRegion valid_region() const =0
Valid region of the tensor.
3 channels, 1 U8 per channel
#define ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(...)
Definition: Validate.h:318
Implementation of a rectangular access pattern.
virtual Format format() const =0
Colour format of the image.
cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set< std::string > &build_opts=std::set< std::string >())
Creates an opencl kernel using a compile context.
Definition: CLHelpers.cpp:403
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Format
Image colour formats.
Definition: Types.h:54
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
Implementation of a row access pattern.
std::string kernel_name
A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
ValidRegion intersect_valid_regions(const Ts &... regions)
Intersect multiple valid regions.
Definition: WindowHelpers.h:74
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49
IImage * plane(unsigned int index) override
Return a pointer to the requested plane of the image.
void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output)
Configure function&#39;s inputs and outputs.
virtual const MultiImageInfo * info() const =0
Interface to be implemented by the child class to return the multi-planar image&#39;s metadata...
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:790
CLCompileContext class.
A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
4 channels, 1 U8 per channel
void set_dimension_step(size_t dimension, int step)
Set the step of a given dimension.
Definition: Window.inl:167
void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
Add the passed 2D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: ICLKernel.h:148
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
constexpr const Dimension & y() const
Alias to access the second dimension of the window.
Definition: Window.h:154
Wrapper to configure the Khronos OpenCL C++ header.
unsigned int num_elems_processed_per_iteration
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Container for valid region of a window.
Definition: Types.h:188
constexpr int end() const
Return the end of the dimension.
Definition: Window.h:99
A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes.
Format format() const
Colour format of the image.
constexpr int start() const
Return the start of the dimension.
Definition: Window.h:94
void run(const Window &window, cl::CommandQueue &queue) override
Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue...
Describe a multidimensional execution window.
Definition: Window.h:39
Coordinates anchor
Anchor for the start of the valid region.
Definition: Types.h:260
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
#define ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...)
Definition: Validate.h:351
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
Definition: Window.h:145
const std::string & string_from_format(Format format)
Convert a tensor format into a string.
Definition: Utils.cpp:76