#include <CpuGemmDirectConv2d.h>

Collaboration diagram for CpuGemmDirectConv2d:

Public Member Functions
	CpuGemmDirectConv2d ()

	ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE (CpuGemmDirectConv2d)

	~CpuGemmDirectConv2d ()

void	configure (const ITensorInfo src, const ITensorInfo weights, const ITensorInfo biases, ITensorInfo dst, const Conv2dInfo &info)
	Set the input and output tensors. More...

void	run (ITensorPack &tensors) override
	Run the kernels contained in the function. More...

void	prepare (ITensorPack &constants) override
	Prepare the function for executing. More...

experimental::MemoryRequirements	workspace () const override
	Return the memory requirements required by the workspace. More...

Public Member Functions inherited from INEOperator
	INEOperator (IRuntimeContext *ctx=nullptr)
	Constructor. More...

	INEOperator (const INEOperator &)=delete
	Prevent instances of this class from being copied (As this class contains pointers) More...

	INEOperator (INEOperator &&)=default
	Default move constructor. More...

INEOperator &	operator= (const INEOperator &)=delete
	Prevent instances of this class from being copied (As this class contains pointers) More...

INEOperator &	operator= (INEOperator &&)=default
	Default move assignment operator. More...

	~INEOperator ()
	Default destructor. More...

Public Member Functions inherited from IOperator
virtual	~IOperator ()=default
	Destructor. More...

Static Public Member Functions
static Status	validate (const ITensorInfo src, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo dst, const Conv2dInfo &info)
	Static function to check if given info will lead to a valid configuration of CpuGemmDirectConv2d. More...

Detailed Description

Definition at line 42 of file CpuGemmDirectConv2d.h.

Constructor & Destructor Documentation

◆ CpuGemmDirectConv2d()

CpuGemmDirectConv2d ( )

Definition at line 99 of file CpuGemmDirectConv2d.cpp.

     : _gemm_asm_func(std::make_unique<CpuGemmAssemblyDispatch>()),
       _activation_func(std::make_unique<CpuActivation>()),
       _weights_permute_func(std::make_unique<CpuPermute>()),
       _aux_mem(AuxTensorIdx::Count),
       _perm_weights(),
       _run_activation(false),
       _is_prepared(false)
 {
 }

◆ ~CpuGemmDirectConv2d()

~CpuGemmDirectConv2d ( )

default

Member Function Documentation

◆ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE()

ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE ( CpuGemmDirectConv2d )

◆ configure()

void configure	(	const ITensorInfo *	src,
		const ITensorInfo *	weights,
		const ITensorInfo *	biases,
		ITensorInfo *	dst,
		const Conv2dInfo &	info
	)

Set the input and output tensors.

Valid data layouts:

All

Valid data type configurations:

src0	src1	src2	dst
QASYMM8	QASYMM8	S32	QASYMM8
QASYMM8_SIGNED	QASYMM8_SIGNED	S32	QASYMM8_SIGNED
F16	F16	F16	F16
F32	F32	F32	F32
BFLOAT16	BFLOAT16	BFLOAT16	BFLOAT16

Parameters

[in]	src	Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.
[in]	weights	Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.
[in]	biases	Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Should match `input` data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
[in]	dst	Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. Data types supported: Same as `input`.
[in]	info	Contains padding and stride information described in PadStrideInfo.

Definition at line 112 of file CpuGemmDirectConv2d.cpp.

 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
     ARM_COMPUTE_ERROR_THROW_ON(
         CpuGemmDirectConv2d::validate(src, weights, biases != nullptr ? biases : nullptr, dst, info));
     ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, info);
  
     _run_activation = info.act_info.enabled() && !_gemm_asm_func->is_activation_supported(info.act_info);
     _is_prepared    = false;
  
     _weights_permute_func->configure(weights, &_perm_weights, PermutationVector{3, 0, 1, 2});
  
     // Configure assembly dispatch
     cpu::AsmGemmInfo asm_info = init_assembly_metadata(info, false);
     if (is_data_type_quantized(src->data_type()))
     {
         asm_info.output_stage = calculate_output_stage_metadata(src, weights, dst, info.act_info);
     }
     _gemm_asm_func->configure(src, &_perm_weights, biases, dst, asm_info);
  
     // Configure activation
     if (_run_activation)
     {
         _activation_func->configure(dst, nullptr, info.act_info);
     }
  
     // Add auxiliary memory requirements of the assembly dispatch
     const auto asm_mem_req = _gemm_asm_func->workspace();
     for (unsigned int slot = 0; slot < asm_mem_req.size(); ++slot)
     {
         _aux_mem[slot] = asm_mem_req[slot];
     }
  
     if (_aux_mem[Pretranspose].size > 0)
     {
         // Release permuted weights at the of prepare as they are further transposed by the assembly dispatch
         _aux_mem[PermutedWeights] =
             MemoryInfo(offset_int_vec(PermutedWeights), MemoryLifetime::Prepare, weights->total_size());
     }
     else
     {
         // We must permute weights if they are WeightFormat::UNSPECIFIED
         if (info.weights_info.weight_format() == WeightFormat::UNSPECIFIED)
             _aux_mem[PermutedWeights] =
                 MemoryInfo(offset_int_vec(PermutedWeights), MemoryLifetime::Persistent, weights->total_size());
     }
 }

References ARM_COMPUTE_ERROR_ON_NULLPTR, ARM_COMPUTE_ERROR_THROW_ON, ARM_COMPUTE_LOG_PARAMS, arm_compute::test::validation::dst, arm_compute::test::validation::info, arm_compute::is_data_type_quantized(), arm_compute::offset_int_vec(), AsmGemmInfo::output_stage, arm_compute::experimental::Prepare, arm_compute::test::validation::src, ITensorInfo::total_size(), arm_compute::UNSPECIFIED, and CpuGemmDirectConv2d::validate().

◆ prepare()

void prepare ( ITensorPack & constants )

overridevirtual

Prepare the function for executing.

Any one off pre-processing step required by the function is handled here

Parameters

[in] constants Vector that contains the constants tensors.

Note: Prepare stage might not need all the function's buffers' backing memory to be available in order to execute

Reimplemented from INEOperator.

Definition at line 223 of file CpuGemmDirectConv2d.cpp.

 {
     if (!_is_prepared)
     {
         // If we are using fixed-format kernel the weights are already reshaped
         if (_gemm_asm_func && _gemm_asm_func->isVarWeightsKernel())
         {
             _gemm_asm_func->prepare(tensors);
             _is_prepared = true;
             return;
         }
         const ITensor *weights = tensors.get_const_tensor(ACL_SRC_1);
         ITensor       *weights_aux =
             utils::cast::polymorphic_cast<ITensor *>(tensors.get_tensor(offset_int_vec(PermutedWeights)));
         ARM_COMPUTE_ERROR_ON_NULLPTR(weights, weights_aux);
  
         CpuAuxTensorHandler permuted_weights(_perm_weights, *weights_aux);
         ITensorPack         permute_tensors{{ACL_SRC, weights}, {ACL_DST, permuted_weights.get()}};
         _weights_permute_func->run(permute_tensors);
  
         tensors.add_const_tensor(ACL_SRC_1, permuted_weights.get());
         // Call prepare of assembly dispatch
         _gemm_asm_func->prepare(tensors);
  
         _is_prepared = true;
     }
 }

References arm_compute::ACL_DST, arm_compute::ACL_SRC, arm_compute::ACL_SRC_1, ITensorPack::add_const_tensor(), ARM_COMPUTE_ERROR_ON_NULLPTR, CpuAuxTensorHandler::get(), ITensorPack::get_const_tensor(), ITensorPack::get_tensor(), and arm_compute::offset_int_vec().

Referenced by CpuGemmDirectConv2d::run().

◆ run()

void run ( ITensorPack & tensors )

overridevirtual

Run the kernels contained in the function.

Parameters

[in] tensors Vector that contains the tensors to operate on.

Reimplemented from INEOperator.

Definition at line 210 of file CpuGemmDirectConv2d.cpp.

 {
     prepare(tensors);
  
     _gemm_asm_func->run(tensors);
     if (_run_activation)
     {
         ITensor    *io = tensors.get_tensor(ACL_DST);
         ITensorPack pack{{ACL_SRC, io}, {ACL_DST, io}};
         _activation_func->run(pack);
     }
 }

References arm_compute::ACL_DST, arm_compute::ACL_SRC, ITensorPack::get_tensor(), arm_compute::test::validation::pack, and CpuGemmDirectConv2d::prepare().

◆ validate()

Status validate	(	const ITensorInfo *	src,
		const ITensorInfo *	weights,
		const ITensorInfo *	biases,
		const ITensorInfo *	dst,
		const Conv2dInfo &	info
	)

static

Static function to check if given info will lead to a valid configuration of CpuGemmDirectConv2d.

Returns: a status

Definition at line 163 of file CpuGemmDirectConv2d.cpp.

 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
                                                          DataType::BFLOAT16, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
                                                          DataType::QSYMM8_PER_CHANNEL, DataType::BFLOAT16,
                                                          DataType::F16, DataType::F32);
     if (!is_fixed_format(info.weights_info.weight_format()))
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, weights);
     }
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.num_groups > 1, "Grouping (num_groups != 1) is not supported on Neon");
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_layout() != DataLayout::NHWC, "Data layout supported is NHWC");
     const DataType    data_type = src->data_type();
     const TensorShape i_shape   = src->tensor_shape();
     const TensorShape w_shape   = weights->tensor_shape();
     ARM_COMPUTE_RETURN_ERROR_ON(w_shape[0] != i_shape[0]);
     ARM_COMPUTE_RETURN_ERROR_ON(info.dilation != Size2D(1U, 1U));
     ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4);
     // Validate biases
     if (biases != nullptr)
     {
         if (is_data_type_quantized_asymmetric(data_type))
         {
             ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
         }
         else if (data_type == DataType::BFLOAT16)
         {
             ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32);
         }
         else
         {
             ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, biases);
         }
         ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(3));
         ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
     }
  
     cpu::AsmGemmInfo asm_info = init_assembly_metadata(info, false);
     ARM_COMPUTE_RETURN_ON_ERROR(cpu::CpuGemmAssemblyDispatch::validate(src, weights, biases, dst, asm_info));
     return Status{};
 }

References ARM_COMPUTE_RETURN_ERROR_ON, ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN, ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT, ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES, ARM_COMPUTE_RETURN_ERROR_ON_MSG, ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR, ARM_COMPUTE_RETURN_ON_ERROR, arm_compute::BFLOAT16, arm_compute::test::validation::data_type, ITensorInfo::dimension(), arm_compute::test::validation::dst, arm_compute::F16, arm_compute::F32, arm_compute::test::validation::info, arm_compute::is_data_type_quantized_asymmetric(), arm_compute::is_fixed_format(), arm_compute::NHWC, ITensorInfo::num_dimensions(), arm_compute::QASYMM8, arm_compute::QASYMM8_SIGNED, arm_compute::QSYMM8_PER_CHANNEL, arm_compute::S32, arm_compute::test::validation::src, ITensorInfo::tensor_shape(), arm_compute::utils::cast::U, and CpuGemmAssemblyDispatch::validate().

Referenced by CpuGemmDirectConv2d::configure(), CpuConv2d::get_convolution_method(), and CpuConv2d::validate().

◆ workspace()

experimental::MemoryRequirements workspace ( ) const

overridevirtual

Return the memory requirements required by the workspace.

Reimplemented from INEOperator.

Definition at line 251 of file CpuGemmDirectConv2d.cpp.

 {
     return _aux_mem;
 }

The documentation for this class was generated from the following files:

src/cpu/operators/CpuGemmDirectConv2d.h
src/cpu/operators/CpuGemmDirectConv2d.cpp

Public Member Functions

Static Public Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ CpuGemmDirectConv2d()

◆ ~CpuGemmDirectConv2d()

Member Function Documentation

◆ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE()

◆ configure()

◆ prepare()

◆ run()

◆ validate()

◆ workspace()