Compute Library
 22.08
NESelectKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "arm_compute/core/Error.h"
30 #include "arm_compute/core/Types.h"
32 #include "src/core/CPP/Validate.h"
36 
38 
40 
41 #include <arm_neon.h>
42 #include <map>
43 #include <string>
44 
45 namespace arm_compute
46 {
47 namespace
48 {
49 
50 struct SelectKernelSelectorData
51 {
54 };
55 
58 
59 struct SelectKernelSelector
60 {
61  const char *name;
62  const SelectorPtr is_selected;
63  KernelPtr ukernel;
64 };
65 
66 static const SelectKernelSelector available_kernels[] =
67 {
68  {
69  "neon_s8_same_rank",
70  [](const SelectKernelSelectorData & data) { return data.dt == DataType::S8 && data.is_same_rank == true; },
72  },
73  {
74  "neon_s16_same_rank",
75  [](const SelectKernelSelectorData & data) { return data.dt == DataType::S16 && data.is_same_rank == true; },
77  },
78  {
79  "neon_s32_same_rank",
80  [](const SelectKernelSelectorData & data) { return data.dt == DataType::S32 && data.is_same_rank == true; },
82  },
83  {
84  "neon_u8_same_rank",
85  [](const SelectKernelSelectorData & data) { return data.dt == DataType::U8 && data.is_same_rank == true; },
87  },
88  {
89  "neon_u16_same_rank",
90  [](const SelectKernelSelectorData & data) { return data.dt == DataType::U16 && data.is_same_rank == true; },
92  },
93  {
94  "neon_u32_same_rank",
95  [](const SelectKernelSelectorData & data) { return data.dt == DataType::U32 && data.is_same_rank == true; },
97  },
98  {
99  "neon_s8_not_same_rank",
100  [](const SelectKernelSelectorData & data) { return data.dt == DataType::S8 && data.is_same_rank == false; },
102  },
103  {
104  "neon_s16_not_same_rank",
105  [](const SelectKernelSelectorData & data) { return data.dt == DataType::S16 && data.is_same_rank == false; },
107  },
108  {
109  "neon_s32_not_same_rank",
110  [](const SelectKernelSelectorData & data) { return data.dt == DataType::S32 && data.is_same_rank == false; },
112  },
113  {
114  "neon_u8_not_same_rank",
115  [](const SelectKernelSelectorData & data) { return data.dt == DataType::U8 && data.is_same_rank == false; },
117  },
118  {
119  "neon_u16_not_same_rank",
120  [](const SelectKernelSelectorData & data) { return data.dt == DataType::U16 && data.is_same_rank == false; },
122  },
123  {
124  "neon_u32_not_same_rank",
125  [](const SelectKernelSelectorData & data) { return data.dt == DataType::U32 && data.is_same_rank == false; },
127  },
128  {
129  "neon_f16_same_rank",
130  [](const SelectKernelSelectorData & data) { return data.dt == DataType::F16 && data.is_same_rank == true; },
132  },
133  {
134  "neon_f16_not_same_rank",
135  [](const SelectKernelSelectorData & data) { return data.dt == DataType::F16 && data.is_same_rank == false; },
137  },
138  {
139  "neon_f32_same_rank",
140  [](const SelectKernelSelectorData & data) { return data.dt == DataType::F32 && data.is_same_rank == true; },
142  },
143  {
144  "neon_f32_not_same_rank",
145  [](const SelectKernelSelectorData & data) { return data.dt == DataType::F32 && data.is_same_rank == false; },
147  },
148 };
149 
150 const SelectKernelSelector *get_implementation(const SelectKernelSelectorData &data)
151 {
152  for(const auto &uk : available_kernels)
153  {
154  if(uk.is_selected(data))
155  {
156  return &uk;
157  }
158  }
159  return nullptr;
160 }
161 
162 } // namespace
163 
165  : /*_function(nullptr), */ _c(nullptr), _x(nullptr), _y(nullptr), _output(nullptr), _has_same_rank(false)
166 {
167 }
168 
169 void NESelectKernel::configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output)
170 {
171  ARM_COMPUTE_ERROR_ON_NULLPTR(c, x, y, output);
172 
173  // Auto initialize output if not initialized
174  auto_init_if_empty(*output->info(), x->info()->tensor_shape(), 1, x->info()->data_type());
175  ARM_COMPUTE_ERROR_THROW_ON(validate(c->info(), x->info(), y->info(), output->info()));
176 
177  _c = c;
178  _x = x;
179  _y = y;
180  _output = output;
181  _has_same_rank = (c->info()->tensor_shape().num_dimensions() == x->info()->tensor_shape().num_dimensions());
182 
183  Window win = calculate_max_window(*x->info());
184  INEKernel::configure(win);
185 }
186 
187 Status NESelectKernel::validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output)
188 {
195 
196  const bool is_same_rank = (c->tensor_shape().num_dimensions() == x->tensor_shape().num_dimensions());
197  ARM_COMPUTE_RETURN_ERROR_ON(is_same_rank && (x->tensor_shape() != c->tensor_shape()));
198  ARM_COMPUTE_RETURN_ERROR_ON(!is_same_rank && ((c->tensor_shape().num_dimensions() > 1) || (c->tensor_shape().x() != x->tensor_shape()[x->tensor_shape().num_dimensions() - 1])));
199 
200  if(output != nullptr && output->total_size() != 0)
201  {
204  }
205 
206  return Status{};
207 }
208 
210 {
211  ARM_COMPUTE_UNUSED(info);
214  ARM_COMPUTE_ERROR_ON(_output == nullptr);
215  ARM_COMPUTE_ERROR_ON(_output->info() == nullptr);
216 
217  const auto *uk = get_implementation(SelectKernelSelectorData{ _output->info()->data_type(), _has_same_rank });
218  ARM_COMPUTE_ERROR_ON(uk == nullptr);
219  ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
220  uk->ukernel(_c, _x, _y, _output, window);
221 }
222 } // namespace arm_compute
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const char * name
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
Definition: Validate.h:115
#define REGISTER_FP16_NEON(func_name)
Definition: Registrars.h:48
NESelectKernel()
Default constructor.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
1 channel, 1 U8 per channel
#define REGISTER_FP32_NEON(func_name)
Definition: Registrars.h:74
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
void neon_u32_select_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:68
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
1 channel, 1 U16 per channel
void neon_u8_select_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:60
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
decltype(strategy::transforms) typedef type
Interface for CPU tensor.
Definition: ITensor.h:36
void neon_f16_select_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Copyright (c) 2017-2022 Arm Limited.
bool is_same_rank
1 channel, 1 F16 per channel
#define REGISTER_INTEGER_NEON(func_name)
Definition: Registrars.h:165
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
1 channel, 1 S32 per channel
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:87
void neon_u16_select_not_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:76
void neon_s8_select_not_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:48
void neon_s16_select_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:40
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
1 channel, 1 U32 per channel
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
void neon_u16_select_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:64
KernelPtr ukernel
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:915
1 channel, 1 S16 per channel
void neon_s8_select_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:36
void neon_s32_select_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:44
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Information about executing thread and CPU.
Definition: CPPTypes.h:179
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
const SelectorPtr is_selected
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:439
unsigned int num_dimensions() const
Returns the effective dimensionality of the tensor.
Definition: Dimensions.h:143
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
void neon_s16_select_not_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:52
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
void neon_f32_select_not_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: fp32.cpp:38
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
void neon_s32_select_not_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:56
void neon_u32_select_not_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:80
Includes all wrapper headers at once.
static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output)
Validate the argument passed to the kernel.
DataType
Available data types.
Definition: Types.h:79
DataType dt
signed 8-bit number
void configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output)
Common signature for all the specialised elementwise functions.
Describe a multidimensional execution window.
Definition: Window.h:39
void neon_u8_select_not_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: integer.cpp:72
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:201
void neon_f16_select_not_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
void neon_f32_select_same_rank(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window)
Definition: fp32.cpp:34