76 _height_offset = height_offset;
80 ICpuKernel::configure(win);
99 uint8_t *dst_ptr =
dst->buffer() +
dst->info()->offset_first_element_in_bytes() + _height_offset *
dst->info()->strides_in_bytes()[
Window::DimY];
101 const auto window_start_x = static_cast<int>(
window.
x().
start());
102 const auto window_end_x = static_cast<int>(
window.
x().
end()) * static_cast<int>(
dst->info()->element_size());
103 const int window_step_x = 16;
120 int x = window_start_x;
121 for(; x <= (window_end_x - window_step_x); x += window_step_x)
127 for(; x < window_end_x; ++x)
139 int x = window_start_x;
140 for(; x <= (window_end_x - window_step_x); x += window_step_x)
142 vst1q_s8(reinterpret_cast<int8_t *>(dst_ptr + dst_it.
offset() + x),
147 for(; x < window_end_x; ++x)
158 const auto in_ptr = src_it.
ptr();
159 const auto out_ptr = dst_ptr + dst_it.
offset();
161 int x = window_start_x;
162 for(; x <= (window_end_x - window_step_x); x += window_step_x)
168 for(; x < window_end_x; ++x)
170 *(out_ptr + x) = *(in_ptr + x);
179 return "CpuConcatenateHeightKernel";
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
const Window & window() const
The maximum window the kernel can be executed on.
float dequantize_qasymm8(uint8_t value, const INFO_TYPE &qinfo)
Dequantize a value given an unsigned 8-bit asymmetric quantization scheme.
uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given an unsigned 8-bit asymmetric quantization scheme.
float32x4x2_t vdequantize(const uint8x8_t &qv, const UniformQuantizationInfo &qi)
Dequantize a neon vector holding 8 quantized values.
uint8x16_t vloadq(const uint8_t *ptr)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Describe one of the image's dimensions with a start, end and step.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
const char * name() const override
Name of the kernel.
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy=RoundingPolicy::TO_NEAREST_UP)
Quantize a value given a signed 8-bit asymmetric quantization scheme.
quantized, asymmetric fixed-point 8-bit number unsigned
Class to describe a number of elements in each dimension.
constexpr uint8_t * ptr() const
Return a pointer to the current pixel.
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Information about executing thread and CPU.
static Status validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration of CpuConcatenateHeightKern...
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(const ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst)
Configure kernel for a given list of arguments.
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
CpuConcatenateHeightKernel()
uint8x8_t vquantize(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)
Quantize a neon vector holding 8 floating point values.
void vstore(uint8_t *ptr, uint8x8_t val)
float dequantize_qasymm8_signed(int8_t value, const INFO_TYPE &qinfo)
Dequantize a value given a signed 8-bit asymmetric quantization scheme.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators)
Iterate through the passed window, automatically adjusting the iterators and calling the lambda_funct...
quantized, asymmetric fixed-point 8-bit number signed
Includes all wrapper headers at once.
int8x8_t vquantize_signed(const float32x4x2_t &qv, const UniformQuantizationInfo &qi)
Quantize a neon vector holding 8 floating point values.
constexpr size_t offset() const
Return the offset in bytes from the first element to the current position of the iterator.
constexpr int end() const
Return the end of the dimension.
static constexpr size_t num_max_dimensions
Number of dimensions the tensor has.
Iterator updated by execute_window_loop for each window element.
DataType
Available data types.
constexpr int start() const
Return the start of the dimension.
Describe a multidimensional execution window.
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
constexpr const Dimension & x() const
Alias to access the first dimension of the window.