Compute Library
 21.02
CpuSubKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
28 #include "src/core/CPP/Validate.h"
33 
34 namespace arm_compute
35 {
36 namespace cpu
37 {
38 namespace kernels
39 {
40 namespace
41 {
42 struct SubSelectorData
43 {
47 };
48 
51 
52 struct SubKernel
53 {
54  const char *name;
55  const SubSelectorPtr is_selected;
56  SubKernelPtr ukernel;
57 };
58 
59 static const SubKernel available_kernels[] =
60 {
61  {
62  "sub_same_neon",
63  [](const SubSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F32)); },
64  REGISTER_FP32_NEON(arm_compute::cpu::sub_same_neon<float>)
65  },
66 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
67  {
68  "sub_same_neon",
69  [](const SubSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::F16)); },
70  REGISTER_FP16_NEON(arm_compute::cpu::sub_same_neon<float16_t>)
71  },
72 #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */
73  {
74  "sub_same_neon",
75  [](const SubSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::U8)); },
76  REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<uint8_t>)
77  },
78  {
79  "sub_same_neon",
80  [](const SubSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S16)); },
81  REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<int16_t>)
82  },
83  {
84  "sub_same_neon",
85  [](const SubSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == data.dt3) && (data.dt1 == DataType::S32)); },
86  REGISTER_INTEGER_NEON(arm_compute::cpu::sub_same_neon<int32_t>)
87  },
88  {
89  "sub_u8_s16_s16_neon",
90  [](const SubSelectorData & data) { return ((data.dt1 == DataType::U8) && (data.dt2 == DataType::S16)); },
92  },
93  {
94  "sub_s16_u8_s16_neon",
95  [](const SubSelectorData & data) { return ((data.dt1 == DataType::S16) && (data.dt2 == DataType::U8)); },
97  },
98  {
99  "sub_u8_u8_s16_neon",
100  [](const SubSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt3 == DataType::S16)); },
102  },
103  {
104  "sub_qasymm8_neon",
105  [](const SubSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8)); },
107  },
108  {
109  "sub_qasymm8_signed_neon",
110  [](const SubSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QASYMM8_SIGNED)); },
112  },
113  {
114  "sub_qsymm16_neon",
115  [](const SubSelectorData & data) { return ((data.dt1 == data.dt2) && (data.dt1 == DataType::QSYMM16)); },
117  },
118 };
119 
120 /** Micro-kernel selector
121  *
122  * @param[in] data Selection data passed to help pick the appropriate micro-kernel
123  *
124  * @return A matching micro-kernel else nullptr
125  */
126 const SubKernel *get_implementation(DataType dt1, DataType dt2, DataType dt3)
127 {
128  for(const auto &uk : available_kernels)
129  {
130  if(uk.is_selected({ dt1, dt2, dt3 }))
131  {
132  return &uk;
133  }
134  }
135  return nullptr;
136 }
137 
138 inline Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst, ConvertPolicy policy)
139 {
140  ARM_COMPUTE_UNUSED(policy);
143  DataType::F32);
145  DataType::F32);
147  DataType::F32);
148 
149  const auto *uk = get_implementation(src0.data_type(), src1.data_type(), dst.data_type());
150  ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
151 
152  const TensorShape out_shape = TensorShape::broadcast_shape(src0.tensor_shape(), src1.tensor_shape());
153  ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
154 
156  !(src0.data_type() == DataType::U8 && src1.data_type() == DataType::U8)
157  && !(src0.data_type() == DataType::QASYMM8 && src1.data_type() == DataType::QASYMM8)
158  && !(src0.data_type() == DataType::QASYMM8_SIGNED && src1.data_type() == DataType::QASYMM8_SIGNED)
159  && !(src0.data_type() == DataType::QSYMM16 && src1.data_type() == DataType::QSYMM16)
160  && !(src0.data_type() == DataType::U8 && src1.data_type() == DataType::U8)
161  && !(src0.data_type() == DataType::U8 && src1.data_type() == DataType::S16)
162  && !(src0.data_type() == DataType::S16 && src1.data_type() == DataType::U8)
163  && !(src0.data_type() == DataType::S16 && src1.data_type() == DataType::S16)
164  && !(src0.data_type() == DataType::S32 && src1.data_type() == DataType::S32)
165  && !(src0.data_type() == DataType::F32 && src1.data_type() == DataType::F32)
166  && !(src0.data_type() == DataType::F16 && src1.data_type() == DataType::F16),
167  "You called subtract with the wrong image formats");
168 
170  (src0.data_type() == DataType::QASYMM8_SIGNED && src1.data_type() == DataType::QASYMM8_SIGNED && policy == ConvertPolicy::WRAP)
171  || (src0.data_type() == DataType::QASYMM8 && src1.data_type() == DataType::QASYMM8 && policy == ConvertPolicy::WRAP)
172  || (src0.data_type() == DataType::QSYMM16 && src1.data_type() == DataType::QSYMM16 && policy == ConvertPolicy::WRAP),
173  "Convert policy cannot be WRAP if datatype is quantized");
174 
175  // Validate in case of configured dst
176  if(dst.total_size() > 0)
177  {
179  !(src0.data_type() == DataType::U8 && src1.data_type() == DataType::U8 && dst.data_type() == DataType::U8)
180  && !(src0.data_type() == DataType::QASYMM8 && src1.data_type() == DataType::QASYMM8 && dst.data_type() == DataType::QASYMM8)
181  && !(src0.data_type() == DataType::QASYMM8_SIGNED && src1.data_type() == DataType::QASYMM8_SIGNED && dst.data_type() == DataType::QASYMM8_SIGNED)
182  && !(src0.data_type() == DataType::QSYMM16 && src1.data_type() == DataType::QSYMM16 && dst.data_type() == DataType::QSYMM16)
183  && !(src0.data_type() == DataType::U8 && src1.data_type() == DataType::U8 && dst.data_type() == DataType::S16)
184  && !(src0.data_type() == DataType::U8 && src1.data_type() == DataType::S16 && dst.data_type() == DataType::S16)
185  && !(src0.data_type() == DataType::S16 && src1.data_type() == DataType::U8 && dst.data_type() == DataType::S16)
186  && !(src0.data_type() == DataType::S16 && src1.data_type() == DataType::S16 && dst.data_type() == DataType::S16)
187  && !(src0.data_type() == DataType::S32 && src1.data_type() == DataType::S32 && dst.data_type() == DataType::S32)
188  && !(src0.data_type() == DataType::F32 && src1.data_type() == DataType::F32 && dst.data_type() == DataType::F32)
189  && !(src0.data_type() == DataType::F16 && src1.data_type() == DataType::F16 && dst.data_type() == DataType::F16),
190  "You called subtract with the wrong image formats");
191 
192  ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst.tensor_shape(), 0),
193  "Wrong shape for dst");
194  }
195  return Status{};
196 }
197 } // namespace
198 
200 {
201  ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst);
202  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, policy));
203 
204  const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*src0, *src1);
205  const TensorShape &out_shape = broadcast_pair.first;
206  const ValidRegion &valid_region = broadcast_pair.second;
207 
208  // Auto initialize dst if not initialized
209  set_shape_if_empty(*dst, out_shape);
210 
211  _policy = policy;
212 
213  // CpuSubKernel doesn't need padding so update_window_and_padding() can be skipped
214  Coordinates coord;
215  coord.set_num_dimensions(dst->num_dimensions());
216  dst->set_valid_region(valid_region);
217  Window win = calculate_max_window(valid_region, Steps());
218 
219  ICpuKernel::configure(win);
220 }
221 
223 {
224  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst);
225  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst, policy));
226 
227  return Status{};
228 }
229 
230 void CpuSubKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
231 {
232  ARM_COMPUTE_UNUSED(info);
235 
236  const ITensor *src0 = tensors.get_const_tensor(TensorType::ACL_SRC_0);
237  const ITensor *src1 = tensors.get_const_tensor(TensorType::ACL_SRC_1);
239 
240  // Dispatch kernel
241  const auto *uk = get_implementation(src0->info()->data_type(), src1->info()->data_type(), dst->info()->data_type());
242  uk->ukernel(src0, src1, dst, _policy, window);
243 }
244 
245 const char *CpuSubKernel::name() const
246 {
247  return "CpuSubKernel";
248 }
249 } // namespace kernels
250 } // namespace cpu
251 } // namespace arm_compute
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
Shape of a tensor.
Definition: TensorShape.h:39
const SubSelectorPtr is_selected
quantized, symmetric fixed-point 16-bit number
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
Definition: Validate.h:108
#define REGISTER_FP16_NEON(func_name)
Definition: Registrars.h:42
1 channel, 1 U8 per channel
#define REGISTER_FP32_NEON(func_name)
Definition: Registrars.h:52
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
static TensorShape broadcast_shape(const Shapes &... shapes)
If shapes are broadcast compatible, return the broadcasted shape.
Definition: TensorShape.h:211
#define REGISTER_QASYMM8_SIGNED_NEON(func_name)
Definition: Registrars.h:62
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Status class.
Definition: Error.h:52
static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy)
Static function to check if given info will lead to a valid configuration of CpuSubKernel.
DataType dt1
const char * name
const ValidRegion valid_region
Definition: Scale.cpp:221
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
static std::pair< TensorShape, ValidRegion > broadcast_shape_and_valid_region(const Infos &... infos)
If infos are broadcast compatible tensor info&#39;s, return the broadcasted shape and the intersection of...
Definition: ITensorInfo.h:271
decltype(strategy::transforms) typedef type
Interface for Neon tensor.
Definition: ITensor.h:36
Copyright (c) 2017-2021 Arm Limited.
virtual void set_valid_region(const ValidRegion &valid_region)=0
Set the valid region of the tensor.
1 channel, 1 F16 per channel
#define REGISTER_INTEGER_NEON(func_name)
Definition: Registrars.h:92
const char * name() const override
Name of the kernel.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
DataType dt3
1 channel, 1 S32 per channel
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:40
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
#define REGISTER_QASYMM8_NEON(func_name)
Definition: Registrars.h:72
void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, ConvertPolicy policy)
Initialise the kernel&#39;s src and dst.
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
Definition: Steps.h:40
Coordinates of an item.
Definition: Coordinates.h:37
#define REGISTER_QSYMM16_NEON(func_name)
Definition: Registrars.h:82
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape)
Set the shape to the specified value if the current assignment is empty.
bool have_different_dimensions(const Dimensions< T > &dim1, const Dimensions< T > &dim2, unsigned int upper_dim)
Definition: Validate.h:51
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941
1 channel, 1 S16 per channel
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
void sub_qasymm8_signed_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
void sub_u8_u8_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
Definition: integer.cpp:117
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:50
void sub_qsymm16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
Definition: qsymm16.cpp:35
Information about executing thread and CPU.
Definition: CPPTypes.h:235
void sub_qasymm8_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
Definition: qasymm8.cpp:35
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
SubKernelPtr ukernel
Tensor packing service.
Definition: ITensorPack.h:37
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
void sub_u8_s16_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
Definition: integer.cpp:111
void set_num_dimensions(size_t num_dimensions)
Set number of dimensions.
Definition: Dimensions.h:149
quantized, asymmetric fixed-point 8-bit number signed
Container for valid region of a window.
Definition: Types.h:188
void sub_s16_u8_s16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window)
Definition: integer.cpp:106
DataType
Available data types.
Definition: Types.h:77
Describe a multidimensional execution window.
Definition: Window.h:39
ConvertPolicy
Policy to handle overflow.
Definition: Types.h:385
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205
DataType dt2