Interface for the width concatenate kernel. More...

#include <CpuConcatenateWidthKernel.h>

Collaboration diagram for CpuConcatenateWidthKernel:

Public Member Functions
	CpuConcatenateWidthKernel ()=default

	ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE (CpuConcatenateWidthKernel)

void	configure (const ITensorInfo src, unsigned int width_offset, ITensorInfo dst)
	Configure kernel for a given list of arguments. More...

void	run_op (ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
	Execute the kernel on the passed window. More...

const char *	name () const override
	Name of the kernel. More...

Public Member Functions inherited from ICPPKernel
virtual	~ICPPKernel ()=default
	Default destructor. More...

virtual void	run (const Window &window, const ThreadInfo &info)
	Execute the kernel on the passed window. More...

virtual void	run_nd (const Window &window, const ThreadInfo &info, const Window &thread_locator)
	legacy compatibility layer for implemantions which do not support thread_locator In these cases we simply narrow the interface down the legacy version More...

virtual size_t	get_mws (const CPUInfo &platform, size_t thread_count) const
	Return minimum workload size of the relevant kernel. More...

Public Member Functions inherited from IKernel
	IKernel ()
	Constructor. More...

virtual	~IKernel ()=default
	Destructor. More...

virtual bool	is_parallelisable () const
	Indicates whether or not the kernel is parallelisable. More...

virtual BorderSize	border_size () const
	The size of the border for that kernel. More...

const Window &	window () const
	The maximum window the kernel can be executed on. More...

bool	is_window_configured () const
	Function to check if the embedded window of this kernel has been configured. More...

Static Public Member Functions
static Status	validate (const ITensorInfo src, unsigned int width_offset, const ITensorInfo dst)
	Static function to check if given info will lead to a valid configuration. More...

Static Public Member Functions inherited from ICpuKernel< CpuConcatenateWidthKernel >
static const auto *	get_implementation (const SelectorType &selector, KernelSelectionType selection_type=KernelSelectionType::Supported)
	Micro-kernel selector. More...

Additional Inherited Members
Static Public Attributes inherited from ICPPKernel
static constexpr size_t	default_mws = 1

Detailed Description

Interface for the width concatenate kernel.

The source tensor will be concatenated into the destination tensor.

Definition at line 40 of file CpuConcatenateWidthKernel.h.

Constructor & Destructor Documentation

◆ CpuConcatenateWidthKernel()

CpuConcatenateWidthKernel ( )

default

Member Function Documentation

◆ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE()

ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE ( CpuConcatenateWidthKernel )

◆ configure()

void configure	(	const ITensorInfo *	src,
		unsigned int	width_offset,
		ITensorInfo *	dst
	)

Configure kernel for a given list of arguments.

Parameters

[in]	src	Source tensor info. Data types supported: All
[in]	width_offset	The offset on the X axis.
[in,out]	dst	Destination tensor info. Data types supported: Same as `src`.

Definition at line 59 of file CpuConcatenateWidthKernel.cpp.

 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, width_offset, dst));
     ARM_COMPUTE_UNUSED(dst);
  
     _width_offset = width_offset;
  
     // Configure kernel window
     Window win = calculate_max_window(*src, Steps());
  
     ICpuKernel::configure(win);
 }

References ARM_COMPUTE_ERROR_ON_NULLPTR, ARM_COMPUTE_ERROR_THROW_ON, ARM_COMPUTE_UNUSED, arm_compute::calculate_max_window(), arm_compute::test::validation::dst, arm_compute::test::validation::src, and arm_compute::cpu::kernels::validate_arguments().

◆ name()

const char * name ( ) const

overridevirtual

Name of the kernel.

Returns: Kernel name

Implements ICPPKernel.

Definition at line 175 of file CpuConcatenateWidthKernel.cpp.

 {
     return "CpuConcatenateWidthKernel";
 }

◆ run_op()

void run_op	(	ITensorPack &	tensors,
		const Window &	window,
		const ThreadInfo &	info
	)

overridevirtual

Execute the kernel on the passed window.

Warning: If is_parallelisable() returns false then the passed window must be equal to window()

Note: The window has to be a region within the window returned by the window() method; The width of the window has to be a multiple of num_elems_processed_per_iteration().

Parameters

[in]	tensors	A vector containing the tensors to operate on.
[in]	window	Region on which to execute the kernel. (Must be a region of the window returned by window())
[in]	info	Info about executing thread and CPU.

Reimplemented from ICPPKernel.

Definition at line 79 of file CpuConcatenateWidthKernel.cpp.

 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
  
     const auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
     auto       dst = tensors.get_tensor(TensorType::ACL_DST);
  
     // Offset output pointer to the correct position
     uint8_t *dst_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() +
                        _width_offset * dst->info()->strides_in_bytes()[0];
  
     const auto    window_start_x = static_cast<int>(window.x().start());
     const auto    window_end_x   = static_cast<int>(window.x().end()) * static_cast<int>(dst->info()->element_size());
     constexpr int window_step_x  = 16;
  
     Window win{window};
     win.set(Window::DimX, Window::Dimension(0, 1, 1));
  
     // Create iterators
     Iterator                       src_it(src, win);
     Iterator                       dst_it(dst, win);
     const DataType                 dt        = src->info()->data_type();
     const UniformQuantizationInfo &src_qinfo = src->info()->quantization_info().uniform();
     const UniformQuantizationInfo &dst_qinfo = dst->info()->quantization_info().uniform();
     if (dt == DataType::QASYMM8 && src_qinfo != dst_qinfo)
     {
         execute_window_loop(
             win,
             [&](const Coordinates &)
             {
                 int x = window_start_x;
                 for (; x <= (window_end_x - window_step_x); x += window_step_x)
                 {
                     vst1q_u8(dst_ptr + dst_it.offset() + x,
                              vquantize(vdequantize(vld1q_u8(src_it.ptr() + x), src_qinfo), dst_qinfo));
                 }
  
                 // Compute left-over elements
                 for (; x < window_end_x; ++x)
                 {
                     *(dst_ptr + dst_it.offset() + x) =
                         quantize_qasymm8(dequantize_qasymm8(*(src_it.ptr() + x), src_qinfo), dst_qinfo);
                 }
             },
             src_it, dst_it);
     }
     else if (dt == DataType::QASYMM8_SIGNED && src_qinfo != dst_qinfo)
     {
         execute_window_loop(
             win,
             [&](const Coordinates &)
             {
                 int x = window_start_x;
                 for (; x <= (window_end_x - window_step_x); x += window_step_x)
                 {
                     vst1q_s8(
                         reinterpret_cast<int8_t *>(dst_ptr + dst_it.offset() + x),
                         vquantize_signed(vdequantize(vld1q_s8(reinterpret_cast<int8_t *>(src_it.ptr() + x)), src_qinfo),
                                          dst_qinfo));
                 }
  
                 // Compute left-over elements
                 for (; x < window_end_x; ++x)
                 {
                     *(dst_ptr + dst_it.offset() + x) =
                         quantize_qasymm8_signed(dequantize_qasymm8_signed(*(src_it.ptr() + x), src_qinfo), dst_qinfo);
                 }
             },
             src_it, dst_it);
     }
     else
     {
         execute_window_loop(
             win,
             [&](const Coordinates &)
             {
                 const auto in_ptr  = src_it.ptr();
                 const auto out_ptr = dst_ptr + dst_it.offset();
                 int        x       = window_start_x;
                 for (; x <= (window_end_x - window_step_x); x += window_step_x)
                 {
                     wrapper::vstore(out_ptr + x, wrapper::vloadq(in_ptr + x));
                 }
  
                 // Compute left-over elements
                 for (; x < window_end_x; ++x)
                 {
                     *(out_ptr + x) = *(in_ptr + x);
                 }
             },
             src_it, dst_it);
     }
 }

References arm_compute::ACL_DST, arm_compute::ACL_SRC, ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW, ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL, ARM_COMPUTE_UNUSED, arm_compute::dequantize_qasymm8(), arm_compute::dequantize_qasymm8_signed(), Window::DimX, arm_compute::test::validation::dst, dt, Window::Dimension::end(), arm_compute::execute_window_loop(), ITensorPack::get_const_tensor(), ITensorPack::get_tensor(), arm_compute::test::validation::info, Iterator::offset(), Iterator::ptr(), arm_compute::QASYMM8, arm_compute::QASYMM8_SIGNED, arm_compute::quantize_qasymm8(), arm_compute::quantize_qasymm8_signed(), Window::set(), arm_compute::test::validation::src, Window::Dimension::start(), arm_compute::vdequantize(), arm_compute::wrapper::vloadq(), arm_compute::vquantize(), arm_compute::vquantize_signed(), arm_compute::wrapper::vstore(), IKernel::window(), and Window::x().

◆ validate()

Status validate	(	const ITensorInfo *	src,
		unsigned int	width_offset,
		const ITensorInfo *	dst
	)

static

Static function to check if given info will lead to a valid configuration.

Returns: a status

Definition at line 73 of file CpuConcatenateWidthKernel.cpp.

 {
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, width_offset, dst));
     return Status{};
 }

References ARM_COMPUTE_RETURN_ON_ERROR, arm_compute::test::validation::dst, arm_compute::test::validation::src, and arm_compute::cpu::kernels::validate_arguments().

Referenced by CpuConcatenate::validate().

The documentation for this class was generated from the following files:

src/cpu/kernels/CpuConcatenateWidthKernel.h
src/cpu/kernels/CpuConcatenateWidthKernel.cpp

Public Member Functions

Static Public Member Functions

Additional Inherited Members