ComputeLibrary/v21.02/_cpu_elementwise_kernel_8cpp_source.xhtml

 /*
  * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "src/core/cpu/kernels/CpuElementwiseKernel.h"

 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/IAccessWindow.h"
 #include "src/core/CPP/Validate.h"
 #include "src/core/common/Registrars.h"
 #include "src/core/cpu/kernels/elementwise/neon/elementwise_list.h"
 #include "src/core/cpu/kernels/elementwise/neon/elementwise_quantized_list.h"
 #include "src/core/cpu/kernels/elementwise/sve/elementwise_list.h"
 #include "src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"

 #include <arm_neon.h>

 namespace arm_compute
 {
 namespace cpu
 {
 namespace kernels
 {
 namespace
 {
 using ElementwiseSelector = std::add_pointer<bool(DataType)>::type;
 using UKernelType         = CpuElementwiseKernel::ElementwiseFunction;
 struct ElementwiseKernel
 {
     const char               *name;
     const ElementwiseSelector is_selected;
     UKernelType              *ukernel;
 };

 template <DataType dt>
 inline bool is_selected(DataType data_type)
 {
     return dt == data_type;
 }

 template <DataType input_data_type, DataType output_data_type = input_data_type>
 static ElementwiseKernel generate_kernel(UKernelType *ukernel)
 {
     std::string kernel_name("op_");
     kernel_name += string_from_data_type(input_data_type) + "_";
     kernel_name += string_from_data_type(input_data_type) + "_";
     kernel_name += string_from_data_type(output_data_type);

     return { kernel_name.c_str(), is_selected<input_data_type>, ukernel };
 }

 template <ArithmeticOperation op>
 std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)>
 configure_arithm_func(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
 {
     ARM_COMPUTE_UNUSED(input2, output);
     static ElementwiseKernel kernels[] =
     {
 #if defined(__ARM_FEATURE_SVE)
         generate_kernel<DataType::F32>(REGISTER_FP32_SVE((arm_compute::cpu::sve::elementwise_arithmetic_op<op, float32_t>))),
         generate_kernel<DataType::S32>(REGISTER_INTEGER_SVE((arm_compute::cpu::sve::elementwise_arithmetic_op<op, int32_t>))),
 #else  /* defined(__ARM_FEATURE_SVE) */
         generate_kernel<DataType::F32>(REGISTER_FP32_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float, 4>>))),
         generate_kernel<DataType::S32>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int32_t, 4>>))),
 #endif /* defined(__ARM_FEATURE_SVE) */
 #if defined(__ARM_FEATURE_SVE2)
         generate_kernel<DataType::QASYMM8>(REGISTER_QASYMM8_SVE((arm_compute::cpu::sve::elementwise_arithmetic_quantized_op<op, uint8_t>))),
         generate_kernel<DataType::QASYMM8_SIGNED>(REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::sve::elementwise_arithmetic_quantized_op<op, int8_t>))),
 #else  /* defined(__ARM_FEATURE_SVE2) */
         generate_kernel<DataType::QASYMM8>(REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_arithm_op_quantized<op>))),
         generate_kernel<DataType::QASYMM8_SIGNED>(REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_arithm_op_quantized_signed<op>))),
 #endif /* defined(__ARM_FEATURE_SVE2) */
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 #if defined(__ARM_FEATURE_SVE)
         generate_kernel<DataType::F16>(REGISTER_FP16_SVE((arm_compute::cpu::sve::elementwise_arithmetic_op<op, float16_t>))),
 #else  /* defined(__ARM_FEATURE_SVE) */
         generate_kernel<DataType::F16>(REGISTER_FP16_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<float16_t, 8>>))),
 #endif /* defined(__ARM_FEATURE_SVE) */
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
         generate_kernel<DataType::S16>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_arithm_op<op, typename wrapper::traits::neon_vector<int16_t, 8>>))),
     };

     for(const auto &uk : kernels)
     {
         if(uk.is_selected(input1->data_type()))
         {
             return uk.ukernel;
         }
     }

     return nullptr;
 }

 template <ComparisonOperation op>
 std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window)>
 configure_comp_func(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
 {
     ARM_COMPUTE_UNUSED(input2, output);
     static ElementwiseKernel kernels[] =
     {
 #if defined(__ARM_FEATURE_SVE)
         generate_kernel<DataType::U8, DataType::U8>(REGISTER_INTEGER_SVE((arm_compute::cpu::sve::elementwise_comparison_op<op, uint8_t>))),
         generate_kernel<DataType::F32, DataType::U8>(REGISTER_FP32_SVE((arm_compute::cpu::sve::elementwise_comparison_op<op, float>))),
         generate_kernel<DataType::S16, DataType::U8>(REGISTER_INTEGER_SVE((arm_compute::cpu::sve::elementwise_comparison_op<op, int16_t>))),
         generate_kernel<DataType::S32, DataType::U8>(REGISTER_INTEGER_SVE((arm_compute::cpu::sve::elementwise_comparison_op<op, int32_t>))),
 #else  /* defined(__ARM_FEATURE_SVE) */
         generate_kernel<DataType::U8, DataType::U8>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_8<op, uint8_t, uint8x16_t>))),
         generate_kernel<DataType::F32, DataType::U8>(REGISTER_FP32_NEON((arm_compute::cpu::elementwise_comp_op_32<op, float, float32x4_t>))),
         generate_kernel<DataType::S16, DataType::U8>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_16<op, int16_t, int16x8_t>))),
         generate_kernel<DataType::S32, DataType::U8>(REGISTER_INTEGER_NEON((arm_compute::cpu::elementwise_comp_op_32<op, int32_t, int32x4_t>))),
 #endif /* defined(__ARM_FEATURE_SVE) */
 #if defined(__ARM_FEATURE_SVE2)
         generate_kernel<DataType::QASYMM8_SIGNED, DataType::U8>(REGISTER_QASYMM8_SIGNED_SVE((arm_compute::cpu::sve::elementwise_comparison_quantized_op<op, int8_t>))),
         generate_kernel<DataType::QASYMM8, DataType::U8>(REGISTER_QASYMM8_SVE((arm_compute::cpu::sve::elementwise_comparison_quantized_op<op, uint8_t>))),
 #else  /* defined(__ARM_FEATURE_SVE2) */
         generate_kernel<DataType::QASYMM8_SIGNED, DataType::U8>(REGISTER_QASYMM8_SIGNED_NEON((arm_compute::cpu::elementwise_comp_op_quantized_signed<op>))),
         generate_kernel<DataType::QASYMM8, DataType::U8>(REGISTER_QASYMM8_NEON((arm_compute::cpu::elementwise_comp_op_quantized<op>))),
 #endif /* defined(__ARM_FEATURE_SVE2) */
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 #if defined(__ARM_FEATURE_SVE)
         generate_kernel<DataType::F16, DataType::U8>(REGISTER_FP16_SVE((arm_compute::cpu::sve::elementwise_comparison_op<op, float16_t>))),
 #else  /* defined(__ARM_FEATURE_SVE) */
         generate_kernel<DataType::F16, DataType::U8>(REGISTER_FP16_NEON((arm_compute::cpu::elementwise_comp_op_16<op, float16_t, float16x8_t>))),
 #endif /* defined(__ARM_FEATURE_SVE) */
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
     };

     for(const auto &uk : kernels)
     {
         if(uk.is_selected(input1->data_type()))
         {
             return uk.ukernel;
         }
     }

     return nullptr;
 }
 } // namespace

 Status CpuElementwiseKernel::validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);

     const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());

     ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");

     // Validate in case of configured output
     if(output.total_size() > 0)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
                                         "Wrong shape for output");
     }

     return Status{};
 }

 void CpuElementwiseKernel::configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);

     // Configure kernel window
     const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2);
     const TensorShape &out_shape    = broadcast_pair.first;
     const ValidRegion &valid_region = broadcast_pair.second;

     // Auto initialize output if not initialized
     auto_init_if_empty(*output, out_shape, 1, input1->data_type());

     Window win = calculate_max_window(valid_region);

     ICpuKernel::configure(win);
 }

 void CpuElementwiseKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info, window);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);

     auto src0 = tensors.get_const_tensor(TensorType::ACL_SRC_0);
     auto src1 = tensors.get_const_tensor(TensorType::ACL_SRC_1);
     auto dst  = tensors.get_tensor(TensorType::ACL_DST);

     auto function = get_implementation(src0->info(), src1->info(), dst->info());
     ARM_COMPUTE_ERROR_ON(function == nullptr);
     function(src0, src1, dst, window);
 }

 /** Arithmetic operators (min, max, squared_diff) */
 void CpuArithmeticKernel::configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
 {
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
     configure_common(input1, input2, output);
     _op = op;
 }

 Status CpuArithmeticKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32);
     // Validate in case of configured output
     if(output.total_size() > 0)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &output);
     }
     return validate_arguments_common(input1, input2, output);
 }

 Status CpuArithmeticKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
 {
     ARM_COMPUTE_UNUSED(op);
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
     return Status{};
 }

 std::function<CpuElementwiseKernel::ElementwiseFunction>
 CpuArithmeticKernel::get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
 {
     switch(_op)
     {
         case ArithmeticOperation::MAX:
             return configure_arithm_func<ArithmeticOperation::MAX>(input1, input2, output);
         case ArithmeticOperation::MIN:
             return configure_arithm_func<ArithmeticOperation::MIN>(input1, input2, output);
         case ArithmeticOperation::SQUARED_DIFF:
             return configure_arithm_func<ArithmeticOperation::SQUARED_DIFF>(input1, input2, output);
         case ArithmeticOperation::PRELU:
             return configure_arithm_func<ArithmeticOperation::PRELU>(input1, input2, output);
         case ArithmeticOperation::DIV:
             return configure_arithm_func<ArithmeticOperation::DIV>(input1, input2, output);
         case ArithmeticOperation::POWER:
             return configure_arithm_func<ArithmeticOperation::POWER>(input1, input2, output);
         default:
             ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
     }
     return nullptr;
 }

 /** The division operator */

 void CpuDivisionKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
 {
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
     configure_common(input1, input2, output);
     _op = ArithmeticOperation::DIV;
 }

 Status CpuDivisionKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::S32, DataType::F16, DataType::F32);
     return CpuArithmeticKernel::validate_arguments(input1, input2, output);
 }

 Status CpuDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
     return Status{};
 }

 /** The power operator */
 void CpuPowerKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
 {
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
     configure_common(input1, input2, output);
     _op = ArithmeticOperation::POWER;
 }

 Status CpuPowerKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::F16, DataType::F32);
     return CpuArithmeticKernel::validate_arguments(input1, input2, output);
 }

 Status CpuPowerKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
     return Status{};
 }

 /** Comparison operators (equal, not equal, less than, greater than, less than or equal, greater than or equal) */
 void CpuComparisonKernel::configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
 {
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
     configure_common(input1, input2, output);
     _op = op;
 }

 Status CpuComparisonKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32);
     // Validate in case of configured output
     if(output.total_size() > 0)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8);
     }
     return validate_arguments_common(input1, input2, output);
 }

 Status CpuComparisonKernel::validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
 {
     ARM_COMPUTE_UNUSED(op);
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
     return Status{};
 }

 std::function<CpuElementwiseKernel::ElementwiseFunction>
 CpuComparisonKernel::get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
 {
     switch(_op)
     {
         case ComparisonOperation::Equal:
             return configure_comp_func<ComparisonOperation::Equal>(input1, input2, output);
         case ComparisonOperation::NotEqual:
             return configure_comp_func<ComparisonOperation::NotEqual>(input1, input2, output);
         case ComparisonOperation::Greater:
             return configure_comp_func<ComparisonOperation::Greater>(input1, input2, output);
         case ComparisonOperation::GreaterEqual:
             return configure_comp_func<ComparisonOperation::GreaterEqual>(input1, input2, output);
         case ComparisonOperation::Less:
             return configure_comp_func<ComparisonOperation::Less>(input1, input2, output);
         case ComparisonOperation::LessEqual:
             return configure_comp_func<ComparisonOperation::LessEqual>(input1, input2, output);
         default:
             ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
     }
     return nullptr;
 }
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
arm_compute::cpu::kernels::CpuPowerKernel::validate
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CpuPowerKernel.
Definition: CpuElementwiseKernel.cpp:298

arm_compute::calculate_max_window
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Definition: WindowHelpers.cpp:28

arm_compute::ArithmeticOperation
ArithmeticOperation
Available element-wise operations.
Definition: Types.h:534

arm_compute::ArithmeticOperation::POWER
x ^ y

WindowHelpers.h

arm_compute::IKernel::window
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28

arm_compute::ACL_DST
Definition: Types.h:46

arm_compute::ArithmeticOperation::DIV
(x / y)

ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
Definition: Validate.h:108

CpuElementwiseKernel.h

REGISTER_FP16_NEON
#define REGISTER_FP16_NEON(func_name)
Definition: Registrars.h:42

arm_compute::cpu::kernels::CpuDivisionKernel::configure
void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
Configure kernel.
Definition: CpuElementwiseKernel.cpp:264

ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352

arm_compute::Format::U8
1 channel, 1 U8 per channel

REGISTER_FP32_NEON
#define REGISTER_FP32_NEON(func_name)
Definition: Registrars.h:52

ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204

elementwise_list.h

arm_compute::Format::F32
1 channel, 1 F32 per channel

REGISTER_FP32_SVE
#define REGISTER_FP32_SVE(func_name)
Definition: Registrars.h:53

arm_compute::TensorShape::broadcast_shape
static TensorShape broadcast_shape(const Shapes &... shapes)
If shapes are broadcast compatible, return the broadcasted shape.
Definition: TensorShape.h:211

REGISTER_QASYMM8_SVE
#define REGISTER_QASYMM8_SVE(func_name)
Definition: Registrars.h:73

arm_compute::cpu::kernels::CpuPowerKernel::configure
void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
Configure kernel.
Definition: CpuElementwiseKernel.cpp:285

ARM_COMPUTE_ERROR_ON
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466

REGISTER_QASYMM8_SIGNED_NEON
#define REGISTER_QASYMM8_SIGNED_NEON(func_name)
Definition: Registrars.h:62

arm_compute::ITensorInfo
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40

ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455

arm_compute::ComparisonOperation::Less
Less comparison (  )

arm_compute::Status
Status class.
Definition: Error.h:52

arm_compute::test::validation::valid_region
const ValidRegion valid_region
Definition: Scale.cpp:221

arm_compute::ITensorInfo::broadcast_shape_and_valid_region
static std::pair< TensorShape, ValidRegion > broadcast_shape_and_valid_region(const Infos &... infos)
If infos are broadcast compatible tensor info&#39;s, return the broadcasted shape and the intersection of...
Definition: ITensorInfo.h:271

type
decltype(strategy::transforms) typedef type
Definition: gemm_interleaved.hpp:227

arm_compute::cpu::kernels::CpuComparisonKernel::configure
void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
Configure kernel.
Definition: CpuElementwiseKernel.cpp:306

arm_compute
Copyright (c) 2017-2021 Arm Limited.
Definition: 00_introduction.dox:24

arm_compute::Format::F16
1 channel, 1 F16 per channel

arm_compute::ComparisonOperation::GreaterEqual
Greater equal comparison (  )

REGISTER_INTEGER_NEON
#define REGISTER_INTEGER_NEON(func_name)
Definition: Registrars.h:92

dt
DataType dt
Definition: CpuActivationKernel.cpp:48

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163

REGISTER_QASYMM8_SIGNED_SVE
#define REGISTER_QASYMM8_SIGNED_SVE(func_name)
Definition: Registrars.h:63

arm_compute::Format::S32
1 channel, 1 S32 per channel

arm_compute::cpu::kernels::CpuElementwiseKernel::run_op
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
Definition: CpuElementwiseKernel.cpp:197

arm_compute::test::validation::data_type
const DataType data_type
Definition: Im2Col.cpp:150

arm_compute::cpu::elementwise_arithm_op
VectorType::type elementwise_arithm_op(const typename VectorType::type &a, const typename VectorType::type &b)
Definition: elementwise_list.h:160

arm_compute::ITensorPack::get_const_tensor
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:40

arm_compute::string_from_data_type
const std::string & string_from_data_type(DataType dt)
Convert a data type identity into a string.
Definition: Utils.cpp:135

ARM_COMPUTE_UNUSED
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152

REGISTER_QASYMM8_NEON
#define REGISTER_QASYMM8_NEON(func_name)
Definition: Registrars.h:72

elementwise_list.h

arm_compute::DataType::QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned

kernel_name
std::string kernel_name
Definition: CLIm2ColKernel.cpp:52

Registrars.h

REGISTER_INTEGER_SVE
#define REGISTER_INTEGER_SVE(func_name)
Definition: Registrars.h:93

arm_compute::auto_init_if_empty
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: AutoConfiguration.h:42

arm_compute::detail::have_different_dimensions
bool have_different_dimensions(const Dimensions< T > &dim1, const Dimensions< T > &dim2, unsigned int upper_dim)
Definition: Validate.h:51

arm_compute::test::validation::dst
auto dst
Definition: DFT.cpp:170

is_selected
const ElementwiseSelector is_selected
Definition: CpuElementwiseKernel.cpp:52

arm_compute::ComparisonOperation
ComparisonOperation
Supported comparison operations.
Definition: Types.h:177

arm_compute::ArithmeticOperation::PRELU
y*x if x < 0, x otherwise

ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941

arm_compute::Format::S16
1 channel, 1 S16 per channel

arm_compute::ACL_SRC_0
Definition: Types.h:43

arm_compute::ACL_SRC_1
Definition: Types.h:44

arm_compute::ComparisonOperation::Equal
Equal comparison (  )

arm_compute::ComparisonOperation::Greater
Greater comparison (  )

elementwise_quantized_list.h

arm_compute::cpu::kernels::CpuComparisonKernel::validate
static Status validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of cpu::kernels::CpuCompari...
Definition: CpuElementwiseKernel.cpp:324

arm_compute::test::validation::info
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)

arm_compute::ITensorPack::get_tensor
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:50

AutoConfiguration.h

arm_compute::ThreadInfo
Information about executing thread and CPU.
Definition: CPPTypes.h:235

arm_compute::ITensorInfo::total_size
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.

REGISTER_FP16_SVE
#define REGISTER_FP16_SVE(func_name)
Definition: Registrars.h:43

arm_compute::cpu::kernels::CpuElementwiseKernel::ElementwiseFunction
void(const ITensor *, const ITensor *, ITensor *, const Window &) ElementwiseFunction
Common signature for all the specialised arithmetic functions.
Definition: CpuElementwiseKernel.h:61

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792

arm_compute::validate_arguments
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
Definition: NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp:45

arm_compute::ComparisonOperation::LessEqual
Less equal comparison (  )

Validate.h

ARM_COMPUTE_RETURN_ERROR_ON_MSG
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244

arm_compute::cpu::kernels::CpuArithmeticKernel::validate
static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of cpu::kernels::CpuArithme...
Definition: CpuElementwiseKernel.cpp:231

arm_compute::ITensorPack
Tensor packing service.
Definition: ITensorPack.h:37

ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161

Helpers.h

arm_compute::ArithmeticOperation::SQUARED_DIFF
(x - y)^2

arm_compute::DataType::QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed

ukernel
UKernelType * ukernel
Definition: CpuElementwiseKernel.cpp:53

arm_compute::NonLinearFilterFunction::MAX
Non linear dilate.

arm_compute::ComparisonOperation::NotEqual
NotEqual comparison (  )

elementwise_quantized_list.h

arm_compute::cpu::kernels::CpuDivisionKernel::validate
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CpuDivisionKernel.
Definition: CpuElementwiseKernel.cpp:277

arm_compute::NonLinearFilterFunction::MIN
Non linear erode.

arm_compute::DataType
DataType
Available data types.
Definition: Types.h:77

arm_compute::cpu::kernels::CpuArithmeticKernel::configure
void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
Configure kernel.
Definition: CpuElementwiseKernel.cpp:213

name
const char * name
Definition: CpuElementwiseKernel.cpp:51

arm_compute::Window
Describe a multidimensional execution window.
Definition: Window.h:39

IAccessWindow.h

ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205