15 #include <arm_compute/runtime/NEON/functions/NEAddMulAdd.h>
20 using namespace armcomputetensorutils;
23 const std::vector<std::reference_wrapper<TensorInfo>>& outputInfos,
27 std::vector<arm_compute::TensorInfo> actInputInfos;
28 actInputInfos.reserve(inputInfos.size());
29 for (
size_t i = 0u; i < inputInfos.size(); ++i)
31 actInputInfos.emplace_back(BuildArmComputeTensorInfo(inputInfos[i]));
34 std::vector<arm_compute::TensorInfo> actOutputInfos;
35 actOutputInfos.reserve(outputInfos.size());
36 for (
size_t i = 0u; i < outputInfos.size(); ++i)
38 actOutputInfos.emplace_back(BuildArmComputeTensorInfo(outputInfos[i]));
41 const arm_compute::ActivationLayerInfo activationInfo =
47 return arm_compute::NEAddMulAdd::validate(
52 actOutputInfos.size() == 1 ?
nullptr : &actOutputInfos[0],
53 actOutputInfos.size() == 1 ? &actOutputInfos[0] : &actOutputInfos[1],
54 arm_compute::ConvertPolicy::SATURATE,
58 "NeonFusedWorkloadValidate: no valid kernel type"};
67 static_cast<unsigned int>(
info.m_InputTensorInfos.size()),
68 static_cast<unsigned int>(
info.m_OutputTensorInfos.size()));
70 std::vector<arm_compute::ITensor*> inputs;
71 inputs.reserve(
info.m_InputTensorInfos.size());
74 inputs.emplace_back(&PolymorphicDowncast<IAclTensorHandle*>(input)->GetTensor());
77 std::vector<arm_compute::ITensor*> outputs;
78 outputs.reserve(
info.m_OutputTensorInfos.size());
81 outputs.emplace_back(&PolymorphicDowncast<IAclTensorHandle*>(output)->GetTensor());
84 const arm_compute::ActivationLayerInfo activationInfo =
91 auto layer = std::make_unique<arm_compute::NEAddMulAdd>();
92 layer->configure(inputs[0],
96 outputs.size() == 1 ?
nullptr : outputs[0],
97 outputs.size() == 1 ? outputs[0] : outputs[1],
98 arm_compute::ConvertPolicy::SATURATE,
100 m_FusedLayer.reset(layer.release());
104 throw Exception(
"NeonFusedWorkload: no valid kernel type.");
#define ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID(name, guid)
arm::pipe::ProfilingGuid GetGuid() const final
Base class for all ArmNN exceptions so that users can filter to just those.
NeonFusedWorkload(const FusedQueueDescriptor &descriptor, const WorkloadInfo &info)
virtual void Execute() const override
Copyright (c) 2021 ARM Limited and Contributors.
arm_compute::ActivationLayerInfo ConvertAdditionalInfoToAclActivationLayerInfo(const QueueDescriptor &queueDescriptor)
arm_compute::Status NeonFusedWorkloadValidate(const std::vector< std::reference_wrapper< TensorInfo >> &inputInfos, const std::vector< std::reference_wrapper< TensorInfo >> &outputInfos, const FusedDescriptor &fusedDescriptor, const ActivationDescriptor *activationDescriptor)
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)
An ActivationDescriptor for the ActivationLayer.
A FusedDescriptor for the FusedLayer.
FusedKernelType m_FusedKernelType
std::vector< ITensorHandle * > m_Inputs
std::vector< ITensorHandle * > m_Outputs
void ValidateInputsOutputs(const std::string &descName, unsigned int numExpectedIn, unsigned int numExpectedOut) const
LayerDescriptor m_Parameters
Contains information about TensorInfos of a layer.