24.02.1
|
Go to the documentation of this file.
54 if (output->total_size() != 0)
75 _split_dimension(
Window::DimY)
83 input->info()->tensor_shape(),
input->info()->data_layout(), block_shape);
92 _block_shape = block_shape;
93 _data_layout =
input->info()->data_layout();
95 constexpr
size_t dim_b = 3;
104 steps.
set(dim_h, block_shape);
105 steps.
set(dim_w, block_shape);
109 ICPPKernel::configure(win);
111 const auto num_batches =
input->info()->tensor_shape().total_size_upper(dim_b);
114 _split_dimension = dim_b;
118 _split_dimension = dim_h;
131 return _split_dimension;
143 const auto element_size =
input_info->element_size();
144 const auto &input_strides =
input_info->strides_in_bytes();
145 const auto &output_strides =
output_info->strides_in_bytes();
149 const uintptr_t k_input_strides[] = {input_strides[0], input_strides[1], input_strides[2], input_strides[3]};
150 const uintptr_t k_output_strides[] = {output_strides[0], output_strides[1], output_strides[2], output_strides[3]};
152 const uint8_t *k_input_ptr = _input->
buffer();
153 uint8_t *k_output_ptr =
155 window[3].start() * output_strides[3] +
156 window[2].start() * output_strides[2] +
157 window[1].start() * output_strides[1] +
158 window[0].start() * output_strides[0];
163 "The window cannot be splitted in channel dimension");
165 const uintptr_t k_input_shape[] = {
172 k_input_ptr +=
window[3].start() * input_strides[3] +
173 window[2].start() * _block_shape * _block_shape * input_strides[2] +
174 (
window[1].start() / _block_shape) * input_strides[1] +
175 (
window[0].start() / _block_shape) * input_strides[0];
178 k_input_ptr, k_output_ptr,
179 k_input_shape, k_input_strides, k_output_strides,
180 element_size, _block_shape);
185 "The window cannot be splitted in channel dimension");
187 const uintptr_t k_input_shape[] = {
194 k_input_ptr +=
window[3].start() * input_strides[3] +
195 (
window[2].start() / _block_shape) * input_strides[2] +
196 (
window[1].start() / _block_shape) * input_strides[1] +
197 window[0].start() * _block_shape * _block_shape * input_strides[0];
200 k_input_ptr, k_output_ptr,
201 k_input_shape, k_input_strides, k_output_strides,
202 element_size, _block_shape);
@ NCHW
Num samples, channels, height, width.
Class to describe a number of elements in each dimension.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
TensorShape compute_depth_to_space_shape(const TensorShape &input_shape, DataLayout data_layout, int block)
Calculate the depth to space output shape of a tensor.
void set(size_t dimension, T value, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
DataLayout
[DataLayout enum definition]
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
void depth_to_space_nchw_any(const uint8_t *src, uint8_t *dst, const uintptr_t src_shape[4], const uintptr_t src_strides[4], const uintptr_t dst_strides[4], uintptr_t element_size, uintptr_t block_size)
constexpr size_t num_iterations(size_t dimension) const
Return the number of iterations needed to iterate through a given dimension.
Interface for CPU tensor.
void depth_to_space_nhwc_any(const uint8_t *src, uint8_t *dst, const uintptr_t src_shape[4], const uintptr_t src_strides[4], const uintptr_t dst_strides[4], uintptr_t element_size, uintptr_t block_size)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
Static function to check if given info will lead to a valid configuration of NEDepthToSpaceLayerKerne...
constexpr auto data_layout
void configure(const ITensor *input, ITensor *output, int32_t block_shape)
Initialise the kernel's inputs and output.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
size_t get_split_dimension() const
Get the dimension the scheduler should use to split.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
TensorShape input_shape
Validate test suite is to test ARM_COMPUTE_RETURN_ON_* macros we use to check the validity of given a...
const Window & window() const
The maximum window the kernel can be executed on.
Information about executing thread and CPU.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Describe a multidimensional execution window.
Copyright (c) 2017-2024 Arm Limited.
NEDepthToSpaceLayerKernel()
Default constructor.
@ UNKNOWN
Unknown CL kernel type.
void end(TokenStream &in, bool &valid)
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
@ UNKNOWN
Unknown data type.
virtual uint8_t * buffer() const =0
Interface to be implemented by the child class to return a pointer to CPU memory.