Compute Library
 23.11
CpuReshapeKernel.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "arm_compute/core/Error.h"
30 #include "arm_compute/core/Types.h"
32 
33 #include "src/core/helpers/Utils.h"
36 
37 #include <cstdint>
38 
39 /** [NEReshapeLayerKernel Kernel] **/
40 namespace arm_compute
41 {
42 namespace cpu
43 {
44 namespace kernels
45 {
46 namespace
47 {
48 Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
49 {
51  // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src) is not needed here as this kernel doesn't use CPU FP16 instructions.
53 
54  if (dst->tensor_shape().total_size() != 0)
55  {
58  ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() != dst->tensor_shape().total_size());
59  }
60 
61  return Status{};
62 }
63 
64 template <typename T>
65 void reshape_tensor_per_element(const Window &window, const ITensor *src, ITensor *dst)
66 {
67  const TensorShape &src_shape = src->info()->tensor_shape();
68  const TensorShape &dst_shape = dst->info()->tensor_shape();
69 
70  Iterator dst_it(dst, window);
71 
73  window,
74  [&](const Coordinates &dst_coord)
75  {
76  Coordinates src_coord = index2coords(src_shape, coords2index(dst_shape, dst_coord));
77  const auto output_ptr = dst->ptr_to_element(dst_coord);
78  const auto input_ptr = src->ptr_to_element(src_coord);
79 
80  *reinterpret_cast<T *>(output_ptr) = *reinterpret_cast<T *>(input_ptr);
81  },
82  dst_it);
83 }
84 
85 void reshape_tensor_per_element_selector(const Window &window, const ITensor *src, ITensor *dst)
86 {
87  switch (src->info()->data_type())
88  {
89  case DataType::U8:
90  case DataType::S8:
91  case DataType::QSYMM8:
92  case DataType::QASYMM8:
95  reshape_tensor_per_element<uint8_t>(window, src, dst);
96  break;
97  case DataType::U16:
98  case DataType::S16:
99  case DataType::F16:
100  reshape_tensor_per_element<uint16_t>(window, src, dst);
101  break;
102  case DataType::U32:
103  case DataType::S32:
104  case DataType::F32:
105  reshape_tensor_per_element<uint32_t>(window, src, dst);
106  break;
107  case DataType::U64:
108  case DataType::S64:
109  case DataType::F64:
110  reshape_tensor_per_element<uint64_t>(window, src, dst);
111  break;
112  default:
113  ARM_COMPUTE_ERROR("Unsupported data type!");
114  }
115 }
116 
117 void reshape_tensor_per_row(const Window &window, const ITensor *src, ITensor *dst)
118 {
119  const TensorShape &src_shape = src->info()->tensor_shape();
120  const TensorShape &dst_shape = dst->info()->tensor_shape();
121  Coordinates src_coord{};
122  Coordinates dst_coord{};
123 
124  const auto element_size = dst->info()->element_size();
125  const auto window_start_x = static_cast<int>(window.x().start());
126  const auto window_end_x = static_cast<int>(window.x().end());
127  const auto src_row_size = static_cast<int>(src_shape[0]);
128  const auto row_size_in_bytes = src_row_size * element_size;
129 
130  auto output_ptr = dst->ptr_to_element(dst_coord);
131  auto input_ptr = src->ptr_to_element(src_coord);
132 
133  Window win = window;
134  win.set(Window::DimX, Window::Dimension(0, 1, 1));
135 
136  Iterator dst_it(dst, win);
138  win,
139  [&](Coordinates &id)
140  {
141  dst_coord = id;
142 
143  for (int x = window_start_x; x < window_end_x; x += src_row_size)
144  {
145  src_coord = index2coords(src_shape, coords2index(dst_shape, dst_coord));
146  output_ptr = dst->ptr_to_element(dst_coord);
147  input_ptr = src->ptr_to_element(src_coord);
148 
149  std::memcpy(output_ptr, input_ptr, row_size_in_bytes);
150 
151  dst_coord.increment(Window::DimX, src_row_size);
152  }
153  },
154  dst_it);
155 }
156 
157 void reshape_tensor_per_window(const Window &window, const ITensor *src, ITensor *dst)
158 {
159  Iterator src_it(src, window);
160  Iterator dst_it(dst, window);
161 
162  const size_t element_size = dst->info()->element_size();
163  const auto window_size = window.x().end() - window.x().start();
164  const auto window_size_in_bytes = window_size * element_size;
165 
166  const auto input_ptr = src_it.ptr();
167  const auto output_ptr = dst_it.ptr();
168 
169  std::memcpy(output_ptr, input_ptr, window_size_in_bytes);
170 }
171 } // namespace
172 
174 {
178 
179  _reshape_tensor_fn = reshape_tensor_per_element_selector;
180  // Configure kernel window
182 
183  ICpuKernel::configure(win);
184 }
185 
187 {
189  return Status{};
190 }
191 
192 void CpuReshapeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
193 {
197 
198  const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
199  auto dst = tensors.get_tensor(TensorType::ACL_DST);
200  _reshape_tensor_fn(window, src, dst);
201 }
202 
203 const char *CpuReshapeKernel::name() const
204 {
205  return "CpuReshapeKernel";
206 }
207 
208 size_t CpuReshapeKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
209 {
210  ARM_COMPUTE_UNUSED(thread_count);
211  ARM_COMPUTE_UNUSED(platform);
212 
214 }
215 
217 {
218  const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
219  auto dst = tensors.get_tensor(TensorType::ACL_DST);
220 
221  const ITensorInfo *src_info = src->info();
222  const ITensorInfo *dst_info = dst->info();
223 
224  // Calculate kernel window based on the padding info
225  Window win;
226 
227  const bool src_has_holes = has_holes(*src_info, src_info->num_dimensions() - 1);
228  const bool dst_has_holes = has_holes(*dst_info, dst_info->num_dimensions() - 1);
229  const bool src_has_holes_in_x = has_holes(*src_info, Window::DimX);
230  const bool dst_has_holes_in_x = has_holes(*dst_info, Window::DimX);
231  const auto src_row_size = static_cast<int>(src_info->tensor_shape()[0]);
232  const auto dst_row_size = static_cast<int>(dst_info->tensor_shape()[0]);
233 
234  if (!src_has_holes && !dst_has_holes)
235  {
236  std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*dst_info);
237  /*
238  Copy the tensor per window. If the src and dst tensors
239  are contiguous memory allocations without any holes or
240  padding, then the tensor is squashed to 1D window and
241  we can use use a single memcopy call to copy the whole
242  window in reshape_tensor_per_window fn
243  */
244  _reshape_tensor_fn = reshape_tensor_per_window;
245  }
246  else
247  {
248  win = calculate_max_window(*dst_info);
249  /*
250  Copy tensor row by row if src and dst have no holes in X
251  dim and they have the same number of elements in their rows
252  */
253  if (!src_has_holes_in_x && !dst_has_holes_in_x && (src_row_size == dst_row_size))
254  {
255  _reshape_tensor_fn = reshape_tensor_per_row;
256  }
257  else
258  {
259  /*
260  Fall back to the element wise copy
261  */
262  _reshape_tensor_fn = reshape_tensor_per_element_selector;
263  }
264  }
265 
266  ICPPKernel::configure(win);
267 }
268 } // namespace kernels
269 } // namespace cpu
270 } // namespace arm_compute
271 /** [NEReshapeLayerKernel Kernel] **/
arm_compute::test::validation::dst_shape
TensorShape dst_shape
Definition: DFT.cpp:164
arm_compute::DataType::QSYMM8_PER_CHANNEL
@ QSYMM8_PER_CHANNEL
quantized, symmetric per channel fixed-point 8-bit number
arm_compute::DataType::U64
@ U64
unsigned 64-bit number
arm_compute::test::validation::src
SimpleTensor< float > src
Definition: DFT.cpp:155
arm_compute::ITensorInfo::tensor_shape
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Helpers.h
arm_compute::DataType::F64
@ F64
64-bit floating-point number
arm_compute::calculate_max_window
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Definition: WindowHelpers.cpp:29
arm_compute::cpu::kernels::CpuReshapeKernel::get_mws
size_t get_mws(const CPUInfo &platform, size_t thread_count) const override
Return minimum workload size of the relevant kernel.
Definition: CpuReshapeKernel.cpp:208
arm_compute::DataType::QASYMM8
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
arm_compute::DataType::U16
@ U16
unsigned 16-bit number
arm_compute::test::validation::dst
auto dst
Definition: DFT.cpp:170
arm_compute::cpu::kernels::validate_arguments
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
Definition: CpuDirectConv2dKernel.cpp:57
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:1079
arm_compute::DataType::QSYMM8
@ QSYMM8
quantized, symmetric fixed-point 8-bit number
Types.h
arm_compute::Window::DimX
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
arm_compute::cpu::kernels::CpuReshapeKernel::validate
static Status validate(const ITensorInfo *src, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration.
Definition: CpuReshapeKernel.cpp:186
ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:354
arm_compute::index2coords
Coordinates index2coords(const TensorShape &shape, int index)
Convert a linear index into n-dimensional coordinates.
Definition: Helpers.inl:164
TensorInfo.h
arm_compute::ITensorPack::get_tensor
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:63
Error.h
arm_compute::DataType::S8
@ S8
signed 8-bit number
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:677
arm_compute::CPUInfo
Definition: CPPTypes.h:66
ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:205
CpuReshapeKernel.h
arm_compute::TensorInfo::num_dimensions
size_t num_dimensions() const override
The number of dimensions of the tensor (rank)
Definition: TensorInfo.h:237
ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:159
arm_compute::cpu::kernels::CpuReshapeKernel::name
const char * name() const override
Name of the kernel.
Definition: CpuReshapeKernel.cpp:203
arm_compute::ITensorPack::get_const_tensor
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:53
ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
arm_compute::DataType::U32
@ U32
unsigned 32-bit number
arm_compute::ITensorPack
Tensor packing service.
Definition: ITensorPack.h:39
arm_compute::execute_window_loop
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
Definition: Helpers.inl:74
ARM_COMPUTE_RETURN_ERROR_ON
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:298
arm_compute::ACL_DST
@ ACL_DST
Definition: Types.h:55
arm_compute::cpu::kernels::CpuReshapeKernel::prepare
void prepare(ITensorPack &tensors)
Prepare the reshape kernel for execution (Only executed once) by calculating max or squashed window a...
Definition: CpuReshapeKernel.cpp:216
arm_compute::DataType::U8
@ U8
unsigned 8-bit number
arm_compute::DataType::S16
@ S16
signed 16-bit number
arm_compute::Status
Status class.
Definition: Error.h:52
arm_compute::DataType::QASYMM8_SIGNED
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
WindowHelpers.h
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:203
ARM_COMPUTE_UNUSED
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:151
INEKernel.h
arm_compute::IKernel::window
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
arm_compute::cpu::kernels::CpuReshapeKernel::run_op
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Definition: CpuReshapeKernel.cpp:192
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...)
Definition: Validate.h:753
arm_compute::ThreadInfo
Information about executing thread and CPU.
Definition: CPPTypes.h:180
arm_compute::coords2index
int coords2index(const TensorShape &shape, const Coordinates &coord)
Convert n-dimensional coordinates into a linear index.
Definition: Helpers.inl:183
arm_compute::cpu::kernels::CpuReshapeKernel::configure
void configure(const ITensorInfo *src, ITensorInfo *dst)
Configure kernel for a given list of arguments.
Definition: CpuReshapeKernel.cpp:173
arm_compute::Window
Describe a multidimensional execution window.
Definition: Window.h:39
Utils.h
arm_compute::DataType::S64
@ S64
signed 64-bit number
arm_compute::calculate_squashed_or_max_window
std::pair< Window, size_t > calculate_squashed_or_max_window(const ITensorInfo &src0, const ITensorInfo &src1)
Definition: WindowHelpers.cpp:256
arm_compute
Copyright (c) 2017-2023 Arm Limited.
Definition: introduction.dox:24
arm_compute::DataType::F16
@ F16
16-bit floating-point number
arm_compute::DataType::S32
@ S32
signed 32-bit number
arm_compute::test::validation::src_info
TensorInfo src_info(src_shape, 1, data_type)
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
arm_compute::ACL_SRC
@ ACL_SRC
Definition: Types.h:44
arm_compute::ITensorInfo
Store the tensor's metadata.
Definition: ITensorInfo.h:44
arm_compute::DataType::F32
@ F32
32-bit floating-point number
ITensor.h
arm_compute::test::validation::info
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
arm_compute::DataType::UNKNOWN
@ UNKNOWN
Unknown data type.
arm_compute::has_holes
bool has_holes(const ITensorInfo &info)
Check if the tensor has any holes.
Definition: Utils.cpp:28
Validate.h
arm_compute::ICPPKernel::default_mws
static constexpr size_t default_mws
Definition: ICPPKernel.h:41
arm_compute::TensorInfo::tensor_shape
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
Definition: TensorInfo.h:245
arm_compute::ITensorInfo::num_dimensions
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)