#include <ClGatherNdWorkload.hpp>

Inheritance diagram for ClGatherNdWorkload:

Collaboration diagram for ClGatherNdWorkload:

Public Member Functions
	ClGatherNdWorkload (const GatherNdQueueDescriptor &descriptor, const WorkloadInfo &info, const arm_compute::CLCompileContext &clCompileContext)

virtual void	Execute () const override

Public Member Functions inherited from ClBaseWorkload< GatherNdQueueDescriptor >
	ClBaseWorkload (const GatherNdQueueDescriptor &descriptor, const WorkloadInfo &info)

void	ReplaceInputTensorHandle (ITensorHandle *tensorHandle, unsigned int slot) override

void	ReplaceOutputTensorHandle (ITensorHandle *tensorHandle, unsigned int slot) override

Public Member Functions inherited from BaseWorkload< GatherNdQueueDescriptor >
	BaseWorkload (const GatherNdQueueDescriptor &descriptor, const WorkloadInfo &info)

virtual const std::string &	GetName () const override

void	ExecuteAsync (ExecutionData &executionData) override

void	PostAllocationConfigure () override

const GatherNdQueueDescriptor &	GetData () const

arm::pipe::ProfilingGuid	GetGuid () const final

virtual bool	SupportsTensorHandleReplacement () const override

Public Member Functions inherited from IWorkload
virtual	~IWorkload ()

virtual arm::pipe::ProfilingGuid	GetGuid () const =0

virtual bool	SupportsTensorHandleReplacement () const =0

virtual const std::string &	GetName () const =0

virtual void	RegisterDebugCallback (const DebugCallbackFunction &)

virtual armnn::Optional< armnn::MemoryRequirements >	GetMemoryRequirements ()

Additional Inherited Members
Protected Member Functions inherited from ClBaseWorkload< GatherNdQueueDescriptor >
virtual void	Reconfigure ()

Protected Attributes inherited from BaseWorkload< GatherNdQueueDescriptor >
GatherNdQueueDescriptor	m_Data

const arm::pipe::ProfilingGuid	m_Guid

const std::string	m_Name

Detailed Description

Definition at line 22 of file ClGatherNdWorkload.hpp.

Constructor & Destructor Documentation

◆ ClGatherNdWorkload()

ClGatherNdWorkload	(	const GatherNdQueueDescriptor &	descriptor,
		const WorkloadInfo &	info,
		const arm_compute::CLCompileContext &	clCompileContext
	)

Calculate flattened indices: m_FlattenedIndices = indices * m_FlattenedCoeff. This could be done using MatMul instead of multiplication followed by reduce sum operation, but GeMM does not support s32 at the moment.

Call Gather with adequate shapes

Definition at line 99 of file ClGatherNdWorkload.cpp.

         : ClBaseWorkload<GatherNdQueueDescriptor>(descriptor, info)
 {
     m_Data.ValidateInputsOutputs("ClGatherNdWorkload", 2, 1);
  
     TensorInfo paramsInfo  = info.m_InputTensorInfos[0];
     TensorInfo indicesInfo = info.m_InputTensorInfos[1];
     TensorInfo outputInfo  = info.m_OutputTensorInfos[0];
  
     arm_compute::ICLTensor& input   = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ICLTensor& indices = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
     arm_compute::ICLTensor& output  = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
  
     // Calculate ND, K, W, C.
     std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
  
     /// Calculate flattened indices: m_FlattenedIndices = indices * m_FlattenedCoeff.
     /// This could be done using MatMul instead of multiplication followed by reduce sum operation,
     /// but GeMM does not support s32 at the moment.
  
     // Prepare the tensor to store the output of the reduce_sum operation
     armnn::TensorInfo flattenedIndices_Info = indicesInfo;
     flattenedIndices_Info.SetShape({ keyIndices["W"] });
     BuildArmComputeTensor(m_FlattenedIndices, flattenedIndices_Info);
     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedIndices);
  
     // Reshape indices into { W, ND }
     indices.info()->set_tensor_shape(BuildArmComputeTensorShape({ keyIndices["W"], keyIndices["ND"] }));
  
     // Calculate the m_FlattenedCoeff
     TensorShape paramsShape = paramsInfo.GetShape();
     std::vector<int32_t> flattenedCoeff(keyIndices["ND"], 1);
     for (unsigned int i = 1; i < keyIndices["ND"]; ++i)
     {
         flattenedCoeff[i - 1] = static_cast<int32_t>(paramsShape[i]);
     }
     for (unsigned int i = keyIndices["ND"] - 1; i > 0; --i)
     {
         flattenedCoeff[i - 1] *= flattenedCoeff[i];
     }
     armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
     flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
     BuildArmComputeTensor(m_FlattenedCoeff, flattenedCoeff_Info);
     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedCoeff);
     CopyArmComputeClTensorData<int32_t>(m_FlattenedCoeff, flattenedCoeff.data());
  
     // Prepare the tensor to store the output of the multiplication
     armnn::TensorInfo outputMul_Info = indicesInfo;
     outputMul_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
     BuildArmComputeTensor(m_OutputMul, outputMul_Info);
     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputMul);
  
     // Multiply
     m_MulLayer.configure(clCompileContext,
                          &indices,
                          &m_FlattenedCoeff,
                          &m_OutputMul,
                          1.0f,
                          arm_compute::ConvertPolicy::WRAP,
                          arm_compute::RoundingPolicy::TO_ZERO,
                          arm_compute::ActivationLayerInfo());
  
     // Reduce Sum
     const std::vector<unsigned int> armnnReduceAxes(1, 1);
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(m_OutputMul.info()->num_dimensions(),
                                                                           outputMul_Info.GetNumDimensions(),
                                                                           armnnReduceAxes);
     m_ReduceSumLayer.configure(clCompileContext,
                                &m_OutputMul,
                                &m_FlattenedIndices,
                                static_cast<unsigned int>(coords[0]),
                                arm_compute::ReductionOperation::SUM,
                                false);
  
     /// Call Gather with adequate shapes
     // Reshape params into { K, C }
     paramsInfo.SetShape({ keyIndices["K"], keyIndices["C"] });
     input.info()->set_tensor_shape(BuildArmComputeTensorShape(paramsInfo.GetShape()));
  
     // Reshape output to have the shape given by gather { W, C }
     // (the original outputInfo has the shape given by gatherNd)
     armnn::TensorInfo outputGather_Info = outputInfo;
     outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
     BuildArmComputeTensor(m_OutputGather, outputGather_Info);
     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputGather);
     {
         ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID("ClGatherNdWorkload_configure");
         auto aclAxis = ComputeAclAxis(0, paramsInfo);
         m_GatherLayer.configure(clCompileContext, &input, &m_FlattenedIndices, &m_OutputGather, aclAxis);
     }
  
     // Reshape output to the original output shape
     m_ReshapeLayer.configure(clCompileContext, &m_OutputGather, &output);
 };

References armnn::CalculateGatherNdKeyIndices(), armnn::info, BaseWorkload< GatherNdQueueDescriptor >::m_Data, QueueDescriptor::m_Inputs, QueueDescriptor::m_Outputs, TensorInfo::SetShape(), and QueueDescriptor::ValidateInputsOutputs().

Member Function Documentation

◆ Execute()

void Execute ( ) const

overridevirtual

Implements IWorkload.

Definition at line 196 of file ClGatherNdWorkload.cpp.

 {
     ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID("ClGatherNdWorkload_Execute");
     RunClFunction(m_MulLayer, CHECK_LOCATION());
     RunClFunction(m_ReduceSumLayer, CHECK_LOCATION());
     RunClFunction(m_GatherLayer, CHECK_LOCATION());
     RunClFunction(m_ReshapeLayer, CHECK_LOCATION());
 }

References ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID, CHECK_LOCATION, and armnn::RunClFunction().

The documentation for this class was generated from the following files:

src/backends/cl/workloads/ClGatherNdWorkload.hpp
src/backends/cl/workloads/ClGatherNdWorkload.cpp

Public Member Functions

Additional Inherited Members

Detailed Description

Constructor & Destructor Documentation

◆ ClGatherNdWorkload()

Member Function Documentation

◆ Execute()