Kernel to perform layer normalization for QLSTM. More...

#include <NEQLSTMLayerNormalizationKernel.h>

Collaboration diagram for NEQLSTMLayerNormalizationKernel:

Public Member Functions
const char *	name () const override
	Name of the kernel. More...

	NEQLSTMLayerNormalizationKernel ()=default
	Default constructor. More...

	NEQLSTMLayerNormalizationKernel (const NEQLSTMLayerNormalizationKernel &)=delete
	Prevent instances of this class from being copied (As this class contains pointers) More...

NEQLSTMLayerNormalizationKernel &	operator= (const NEQLSTMLayerNormalizationKernel &)=delete
	Prevent instances of this class from being copied (As this class contains pointers) More...

	NEQLSTMLayerNormalizationKernel (NEQLSTMLayerNormalizationKernel &&)=default
	Default Move Constructor. More...

NEQLSTMLayerNormalizationKernel &	operator= (NEQLSTMLayerNormalizationKernel &&)=default
	Default move assignment operator. More...

	~NEQLSTMLayerNormalizationKernel ()=default
	Default destructor. More...

void	configure (const ITensor input, ITensor output, const ITensor weight, const ITensor bias)
	Set the input and output tensors. More...

void	run (const Window &window, const ThreadInfo &info) override
	Execute the kernel on the passed window. More...

Public Member Functions inherited from ICPPKernel
virtual	~ICPPKernel ()=default
	Default destructor. More...

virtual void	run_nd (const Window &window, const ThreadInfo &info, const Window &thread_locator)
	legacy compatibility layer for implemantions which do not support thread_locator In these cases we simply narrow the interface down the legacy version More...

virtual void	run_op (ITensorPack &tensors, const Window &window, const ThreadInfo &info)
	Execute the kernel on the passed window. More...

virtual size_t	get_mws (const CPUInfo &platform, size_t thread_count) const
	Return minimum workload size of the relevant kernel. More...

Public Member Functions inherited from IKernel
	IKernel ()
	Constructor. More...

virtual	~IKernel ()=default
	Destructor. More...

virtual bool	is_parallelisable () const
	Indicates whether or not the kernel is parallelisable. More...

virtual BorderSize	border_size () const
	The size of the border for that kernel. More...

const Window &	window () const
	The maximum window the kernel can be executed on. More...

bool	is_window_configured () const
	Function to check if the embedded window of this kernel has been configured. More...

Static Public Member Functions
static Status	validate (const ITensorInfo input, const ITensorInfo output, const ITensorInfo weight, const ITensorInfo bias)
	Static function to check if given info will lead to a valid configuration of NEQLSTMLayerNormalizationKernel. More...

Additional Inherited Members
Static Public Attributes inherited from ICPPKernel
static constexpr size_t	default_mws = 1

Detailed Description

Kernel to perform layer normalization for QLSTM.

Definition at line 36 of file NEQLSTMLayerNormalizationKernel.h.

Constructor & Destructor Documentation

◆ NEQLSTMLayerNormalizationKernel() [1/3]

NEQLSTMLayerNormalizationKernel ( )

default

Default constructor.

◆ NEQLSTMLayerNormalizationKernel() [2/3]

NEQLSTMLayerNormalizationKernel ( const NEQLSTMLayerNormalizationKernel & )

delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ NEQLSTMLayerNormalizationKernel() [3/3]

NEQLSTMLayerNormalizationKernel ( NEQLSTMLayerNormalizationKernel && )

default

Default Move Constructor.

◆ ~NEQLSTMLayerNormalizationKernel()

~NEQLSTMLayerNormalizationKernel ( )

default

Default destructor.

Member Function Documentation

◆ configure()

void configure	(	const ITensor *	input,
		ITensor *	output,
		const ITensor *	weight,
		const ITensor *	bias
	)

Set the input and output tensors.

Parameters

[in]	input	Source tensor. Data types supported: QSYMM16.
[out]	output	Destination tensor. Data types supported: Same as `input`.
[in]	weight	Weight tensor. Data types supported: Same as `input`.
[in]	bias	Bias tensor. Data types supported: S32

Definition at line 84 of file NEQLSTMLayerNormalizationKernel.cpp.

 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weight, bias, output);
     ARM_COMPUTE_ERROR_ON(input == output);
     ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), weight->info(), bias->info()));
  
     static const std::map<DataType, ComputeFuncType> fn_map = {
         {DataType::QSYMM16, std::mem_fn(&NEQLSTMLayerNormalizationKernel::compute_qsymm16)},
     };
  
     _input  = input;
     _output = output;
     _weight = weight;
     _bias   = bias;
     _fn     = fn_map.at(_input->info()->data_type());
  
     auto_init_if_empty(*_output->info(), *_input->info());
     _output->info()->set_quantization_info(compute_output_qinfo());
  
     const UniformQuantizationInfo wq_info = _weight->info()->quantization_info().uniform();
     const Status s = quantization::calculate_quantized_multiplier(wq_info.scale, &_output_multiplier, &_output_shift);
     _output_shift *= -1;
  
     if (!bool(s))
     {
         _output_multiplier = 0;
         _output_shift      = 0;
     }
  
     Window win = configure_window(output);
     INEKernel::configure(win);
 }

References ARM_COMPUTE_ERROR_ON, ARM_COMPUTE_ERROR_ON_NULLPTR, ARM_COMPUTE_ERROR_THROW_ON, arm_compute::auto_init_if_empty(), bias, arm_compute::quantization::calculate_quantized_multiplier(), ITensorInfo::data_type(), ITensor::info(), arm_compute::test::validation::input, arm_compute::QSYMM16, ITensorInfo::quantization_info(), UniformQuantizationInfo::scale, ITensorInfo::set_quantization_info(), QuantizationInfo::uniform(), and NEQLSTMLayerNormalizationKernel::validate().

◆ name()

const char* name ( ) const

inlineoverridevirtual

Name of the kernel.

Returns: Kernel name

Implements ICPPKernel.

Definition at line 39 of file NEQLSTMLayerNormalizationKernel.h.

     {
         return "NEQLSTMLayerNormalizationKernel";
     }

◆ operator=() [1/2]

NEQLSTMLayerNormalizationKernel& operator= ( const NEQLSTMLayerNormalizationKernel & )

delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ operator=() [2/2]

NEQLSTMLayerNormalizationKernel& operator= ( NEQLSTMLayerNormalizationKernel && )

default

Default move assignment operator.

◆ run()

void run	(	const Window &	window,
		const ThreadInfo &	info
	)

overridevirtual

Execute the kernel on the passed window.

Warning: If is_parallelisable() returns false then the passed window must be equal to window()

Note: The window has to be a region within the window returned by the window() method; The width of the window has to be a multiple of num_elems_processed_per_iteration().

Parameters

[in]	window	Region on which to execute the kernel. (Must be a region of the window returned by window())
[in]	info	Info about executing thread and CPU.

Reimplemented from ICPPKernel.

Definition at line 168 of file NEQLSTMLayerNormalizationKernel.cpp.

 {
     ARM_COMPUTE_UNUSED(window, info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
     ARM_COMPUTE_ERROR_ON_MSG(!_fn, "internal function is not defined for computation");
  
     _fn(*this);
 }

References ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW, ARM_COMPUTE_ERROR_ON_MSG, ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL, ARM_COMPUTE_UNUSED, arm_compute::test::validation::info, and IKernel::window().

◆ validate()

Status validate	(	const ITensorInfo *	input,
		const ITensorInfo *	output,
		const ITensorInfo *	weight,
		const ITensorInfo *	bias
	)

static

Static function to check if given info will lead to a valid configuration of NEQLSTMLayerNormalizationKernel.

Parameters

[in]	input	Source tensor info. Data types supported: QSYMM16.
[in]	output	Destination tensor info. Data types supported: Same as `input`.
[in]	weight	Weight tensor info. Data types supported: Same as `input`.
[in]	bias	Bias tensor info. Data types supported: S32

Returns: a status

Definition at line 139 of file NEQLSTMLayerNormalizationKernel.cpp.

 {
     ARM_COMPUTE_UNUSED(output, bias, weight, input);
  
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weight, bias, output);
  
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QSYMM16);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weight, 1, DataType::QSYMM16);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
  
     ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > max_input_dimension);
     ARM_COMPUTE_RETURN_ERROR_ON(weight->num_dimensions() > max_weight_dimension);
     ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > max_bias_dimension);
  
     ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().x() != weight->tensor_shape().x());
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(weight, bias);
  
     if (output->total_size() != 0)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
     }
  
     return Status{};
 }