15 #include <arm_compute/runtime/NEON/functions/NEAddMulAdd.h>
20 using namespace armcomputetensorutils;
23 const std::vector<std::reference_wrapper<TensorInfo>>& outputInfos,
27 std::vector<arm_compute::TensorInfo> actInputInfos;
28 actInputInfos.reserve(inputInfos.size());
29 for (
size_t i = 0u; i < inputInfos.size(); ++i)
31 actInputInfos.emplace_back(BuildArmComputeTensorInfo(inputInfos[i]));
34 std::vector<arm_compute::TensorInfo> actOutputInfos;
35 actOutputInfos.reserve(outputInfos.size());
36 for (
size_t i = 0u; i < outputInfos.size(); ++i)
38 actOutputInfos.emplace_back(BuildArmComputeTensorInfo(outputInfos[i]));
41 const arm_compute::ActivationLayerInfo activationInfo =
47 return arm_compute::NEAddMulAdd::validate(
52 actOutputInfos.size() == 1 ?
nullptr : &actOutputInfos[0],
53 actOutputInfos.size() == 1 ? &actOutputInfos[0] : &actOutputInfos[1],
54 arm_compute::ConvertPolicy::SATURATE,
58 "NeonFusedWorkloadValidate: no valid kernel type"};
67 static_cast<unsigned int>(
info.m_InputTensorInfos.size()),
68 static_cast<unsigned int>(
info.m_OutputTensorInfos.size()));
70 std::vector<arm_compute::ITensor*> inputs;
71 inputs.reserve(
info.m_InputTensorInfos.size());
74 inputs.emplace_back(&PolymorphicDowncast<IAclTensorHandle*>(input)->GetTensor());
77 std::vector<arm_compute::ITensor*> outputs;
78 outputs.reserve(
info.m_OutputTensorInfos.size());
81 outputs.emplace_back(&PolymorphicDowncast<IAclTensorHandle*>(output)->GetTensor());
84 const arm_compute::ActivationLayerInfo activationInfo =
91 auto layer = std::make_unique<arm_compute::NEAddMulAdd>();
92 layer->configure(inputs[0],
96 outputs.size() == 1 ?
nullptr : outputs[0],
97 outputs.size() == 1 ? outputs[0] : outputs[1],
98 arm_compute::ConvertPolicy::SATURATE,
100 m_FusedLayer.reset(layer.release());
104 throw Exception(
"NeonFusedWorkload: no valid kernel type.");