24.02
|
Data Structures | |
struct | brelu |
Bounded RELU activation object. More... | |
class | compare_dimension |
Function to compare two Dimensions objects and throw an error on mismatch. More... | |
struct | dummy |
Dummy activation object. More... | |
struct | get_tensor_info_t |
Get the info for a tensor, dummy struct. More... | |
struct | get_tensor_info_t< ITensorInfo * > |
Get the info for a tensor. More... | |
struct | Header |
struct | linear |
Linear activation object. More... | |
struct | logistic |
Logistic activation object. More... | |
struct | lubrelu |
Lower-Upper Bounded RELU activation object. More... | |
struct | relu |
RELU activation object. More... | |
struct | square |
Square activation object. More... | |
Enumerations | |
enum | ObjectType : uint32_t { Context = 1, Queue = 2, Tensor = 3, TensorPack = 4, Operator = 5, Invalid = 0x56DEAD78 } |
< Object type enumerations More... | |
Functions | |
template<typename T > | |
bool | have_different_dimensions (const Dimensions< T > &dim1, const Dimensions< T > &dim2, unsigned int upper_dim) |
template<typename F > | |
arm_compute::Status | for_each_error (F &&) |
template<typename F , typename T , typename... Ts> | |
arm_compute::Status | for_each_error (F &&func, T &&arg, Ts &&...args) |
StatusCode | validate_internal_context (const IContext *ctx) |
Check if an internal context is valid. More... | |
StatusCode | validate_internal_operator (const IOperator *op) |
Check if an internal operator is valid. More... | |
StatusCode | validate_internal_queue (const IQueue *queue) |
Check if an internal queue is valid. More... | |
StatusCode | validate_internal_tensor (const ITensorV2 *tensor) |
Check if an internal tensor is valid. More... | |
StatusCode | validate_internal_pack (const TensorPack *pack) |
Check if an internal TensorPack is valid. More... | |
TensorInfo | convert_to_legacy_tensor_info (const AclTensorDescriptor &desc) |
Convert a descriptor to a legacy format one. More... | |
AclTensorDescriptor | convert_to_descriptor (const TensorInfo &info) |
Convert a legacy tensor meta-data to a descriptor. More... | |
ActivationLayerInfo | convert_to_activation_info (const AclActivationDescriptor &desc) |
Convert an AclActivation descriptor to an internal one. More... | |
float32x4x3_t | load_matrix_row (const float *ptr) |
template<unsigned int stridex> | |
float32x4x2_t | convolve_3x3 (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<> | |
float32x4x2_t | convolve_3x3< 1 > (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<> | |
float32x4x2_t | convolve_3x3< 2 > (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<> | |
float32x4x2_t | convolve_3x3< 3 > (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<unsigned int stridex> | |
void | store_results (float *buffer, const float32x4x2_t &values) |
Stores a float32x4x2_t array into a memory location. More... | |
template<> | |
void | store_results< 1 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | store_results< 2 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | store_results< 3 > (float *buffer, const float32x4x2_t &values) |
template<unsigned int stridex> | |
int | get_input_num_elems_processed (unsigned int num_elems_written_per_iteration) |
template<> | |
int | get_input_num_elems_processed< 1 > (unsigned int num_elems_written_per_iteration) |
template<> | |
int | get_input_num_elems_processed< 2 > (unsigned int num_elems_written_per_iteration) |
template<> | |
int | get_input_num_elems_processed< 3 > (unsigned int num_elems_written_per_iteration) |
float32x4x3_t | load_matrix_row (const float *ptr, int weights_offset=0) |
Loads a 3x3 matrix as a row (float). More... | |
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > | |
int32x4x3_t | load_matrix_row (const T *ptr, int weights_offset=0) |
Loads a 3x3 matrix as a row (uint8_t/int8_t). More... | |
template<unsigned int stridex> | |
void | store_results (int32_t *buffer, const int32x4x2_t &values) |
Stores a uint32_t array into a memory location. More... | |
template<> | |
void | store_results< 1 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | store_results< 2 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | store_results< 3 > (int32_t *buffer, const int32x4x2_t &values) |
template<unsigned int stridex> | |
void | accumulate_results (float *buffer, const float32x4x2_t &values) |
template<> | |
void | accumulate_results< 1 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | accumulate_results< 2 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | accumulate_results< 3 > (float *buffer, const float32x4x2_t &values) |
template<unsigned int stridex> | |
void | accumulate_results (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | accumulate_results< 1 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | accumulate_results< 2 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | accumulate_results< 3 > (int32_t *buffer, const int32x4x2_t &values) |
float32x4_t | single_convolve_3x3_dilation (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, const size_t dilation_x, int input_offset) |
Perform a 3x3 convolution for 4 consecutive elements on float32 when dilation.x() or dilation.y() is not 1. More... | |
float32x4x2_t | convolve_3x3_dilation (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, const size_t dilation_x, unsigned int stridex, int input_offset=0) |
Perform a 3x3 convolution for 8 consecutive elements on float32 when dilation.x() or dilation.y() is not 1. More... | |
template<bool accumulate> | |
void | convolve_3x3 (const float *in_top, const float *in_mid, const float *in_low, float *out_ptr, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, unsigned int stridex, int input_offset=0) |
Perform a convolve3x3 on float32. More... | |
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > | |
int32x4_t | single_convolve_3x3_dilation (const T *in_top, const T *in_mid, const T *in_low, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, size_t dilation_x, int32_t input_offset) |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1. More... | |
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > | |
int32x4x2_t | convolve_3x3_dilation (const T *in_top, const T *in_mid, const T *in_low, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, const size_t dilation_x, unsigned int stridex, int input_offset) |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1. More... | |
template<bool accumulate, typename T1 , typename T2 , ARM_COMPUTE_REQUIRES_TA(std::is_same< T1, uint8_t >::value||std::is_same< T1, int8_t >::value) > | |
void | convolve_3x3 (const T1 *in_top, const T1 *in_mid, const T1 *in_low, T2 *out_ptr, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, unsigned int stridex, int32_t input_offset) |
Perform a convolve3x3 on 8-bit elements. More... | |
int | get_input_num_elems_processed (unsigned int num_elems_written_per_iteration, unsigned int stridex) |
__ARM_FEATURE_FP16_VECTOR_ARITHMETIC More... | |
|
strong |
|
inline |
void arm_compute::detail::accumulate_results | ( | int32_t * | buffer, |
const int32x4x2_t & | values | ||
) |
|
inline |
|
inline |
Definition at line 154 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 139 of file NEDirectConvolutionDetail.h.
Referenced by convolve_3x3().
|
inline |
Definition at line 161 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 145 of file NEDirectConvolutionDetail.h.
Referenced by convolve_3x3().
|
inline |
Definition at line 167 of file NEDirectConvolutionDetail.h.
ActivationLayerInfo convert_to_activation_info | ( | const AclActivationDescriptor & | desc | ) |
Convert an AclActivation descriptor to an internal one.
[in] | desc | Descriptor to convert |
Definition at line 108 of file LegacySupport.cpp.
References AclActivationDescriptor::a, AclAbs, AclBoundedRelu, AclElu, AclHardSwish, AclIdentity, AclLeakyRelu, AclLinear, AclLogistic, AclLuBoundedRelu, AclRelu, AclSoftRelu, AclSqrt, AclSquare, AclTanh, AclActivationDescriptor::b, and AclActivationDescriptor::type.
Referenced by CpuContext::create_activation(), and ClContext::create_activation().
AclTensorDescriptor convert_to_descriptor | ( | const TensorInfo & | info | ) |
Convert a legacy tensor meta-data to a descriptor.
[in] | info | Legacy tensor meta-data |
Definition at line 100 of file LegacySupport.cpp.
References arm_compute::test::validation::info.
Referenced by ITensorV2::get_descriptor().
TensorInfo convert_to_legacy_tensor_info | ( | const AclTensorDescriptor & | desc | ) |
Convert a descriptor to a legacy format one.
[in] | desc | Descriptor to convert |
Definition at line 92 of file LegacySupport.cpp.
References AclTensorDescriptor::data_type, TensorInfo::init(), AclTensorDescriptor::ndims, and AclTensorDescriptor::shape.
Referenced by ClTensor::ClTensor(), CpuTensor::CpuTensor(), CpuContext::create_activation(), and ClContext::create_activation().
float32x4x2_t arm_compute::detail::convolve_3x3 | ( | const float * | in_top, |
const float * | in_mid, | ||
const float * | in_low, | ||
const float32x4x3_t & | m0, | ||
const float32x4x3_t & | m1, | ||
const float32x4x3_t & | m2 | ||
) |
|
inline |
Perform a convolve3x3 on float32.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[out] | out_ptr | Pointer to the output. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | (Optional) Input quantization offset. |
Definition at line 335 of file NEDirectConvolutionDetail.h.
References arm_compute::test::validation::reference::accumulate(), accumulate_results< 1 >(), accumulate_results< 2 >(), accumulate_results< 3 >(), ARM_COMPUTE_ERROR_ON, ARM_COMPUTE_UNUSED, store_results< 1 >(), store_results< 2 >(), and store_results< 3 >().
void arm_compute::detail::convolve_3x3 | ( | const T1 * | in_top, |
const T1 * | in_mid, | ||
const T1 * | in_low, | ||
T2 * | out_ptr, | ||
const int32x4x3_t & | m0, | ||
const int32x4x3_t & | m1, | ||
const int32x4x3_t & | m2, | ||
unsigned int | stridex, | ||
int32_t | input_offset | ||
) |
Perform a convolve3x3 on 8-bit elements.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[out] | out_ptr | Pointer to the output. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | Input quantization offset. |
Definition at line 539 of file NEDirectConvolutionDetail.h.
References arm_compute::test::validation::reference::accumulate(), accumulate_results< 1 >(), accumulate_results< 2 >(), accumulate_results< 3 >(), ARM_COMPUTE_ERROR_ON, store_results< 1 >(), store_results< 2 >(), store_results< 3 >(), type, arm_compute::wrapper::vaddw(), arm_compute::wrapper::vdup_n(), arm_compute::wrapper::vext_1(), arm_compute::wrapper::vext_2(), arm_compute::wrapper::vgethigh(), arm_compute::wrapper::vgetlane(), arm_compute::wrapper::vgetlow(), arm_compute::wrapper::vload(), arm_compute::wrapper::vmla(), arm_compute::wrapper::vmovl(), arm_compute::wrapper::vreinterpret(), and arm_compute::wrapper::vsetlane().
|
inline |
Definition at line 49 of file NEDirectConvolution3x3.h.
Referenced by convolve_3x3< 2 >(), and convolve_3x3< 3 >().
|
inline |
|
inline |
|
inline |
Perform a 3x3 convolution for 8 consecutive elements on float32 when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | (Optional) Input quantization offset. |
Definition at line 281 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_ERROR_ON, and single_convolve_3x3_dilation().
|
inline |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | Input quantization offset. |
Definition at line 494 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_ERROR_ON, single_convolve_3x3_dilation(), arm_compute::wrapper::vgetlane(), and arm_compute::wrapper::vsetlane().
|
inline |
Definition at line 108 of file Validate.h.
Referenced by arm_compute::error_on_mismatching_dimensions(), and for_each_error().
|
inline |
Definition at line 114 of file Validate.h.
References GemmTuner::args, ARM_COMPUTE_RETURN_ON_ERROR, and for_each_error().
int arm_compute::detail::get_input_num_elems_processed | ( | unsigned int | num_elems_written_per_iteration | ) |
|
inline |
__ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Get the number of elements processed on 3x3 convolution.
[in] | num_elems_written_per_iteration | Number of elements written per iteration on 3x3 convolution. |
[in] | stridex | Stride value in elements across x. |
Definition at line 830 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_ERROR.
int arm_compute::detail::get_input_num_elems_processed< 1 > | ( | unsigned int | num_elems_written_per_iteration | ) |
Definition at line 138 of file NEDirectConvolution3x3.h.
int arm_compute::detail::get_input_num_elems_processed< 2 > | ( | unsigned int | num_elems_written_per_iteration | ) |
Definition at line 144 of file NEDirectConvolution3x3.h.
int arm_compute::detail::get_input_num_elems_processed< 3 > | ( | unsigned int | num_elems_written_per_iteration | ) |
Definition at line 150 of file NEDirectConvolution3x3.h.
|
inline |
Definition at line 51 of file Validate.h.
Referenced by GpuKernelComponentGroup::add_component(), arm_compute::test::validation::reference::batch_to_space(), compare_dimension< T >::operator()(), GpuOperatorGroup::try_add_operator(), NELogicalKernel::validate(), and ClComponentElementwiseBinary::validate().
|
inline |
Definition at line 34 of file NEDirectConvolution3x3.h.
|
inline |
Loads a 3x3 matrix as a row (float).
[in] | ptr | Pointer to a float 3x3 matrix. |
[in] | weights_offset | (Optional) Weights quantization offset. |
Definition at line 45 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_UNUSED.
|
inline |
Loads a 3x3 matrix as a row (uint8_t/int8_t).
[in] | ptr | Pointer to a uint8_t/int8_t 3x3 matrix. |
[in] | weights_offset | (Optional) Weights quantization offset. |
Definition at line 60 of file NEDirectConvolutionDetail.h.
|
inline |
Perform a 3x3 convolution for 4 consecutive elements on float32 when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | input_offset | (Optional) Input quantization offset. |
Definition at line 236 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_UNUSED.
Referenced by convolve_3x3_dilation().
|
inline |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | input_offset | Input quantization offset. |
Definition at line 428 of file NEDirectConvolutionDetail.h.
References type, arm_compute::wrapper::vaddw(), arm_compute::wrapper::vdup_n(), arm_compute::wrapper::vgetlow(), arm_compute::wrapper::vload(), arm_compute::wrapper::vmla(), arm_compute::wrapper::vmovl(), arm_compute::wrapper::vmul(), and arm_compute::wrapper::vreinterpret().
void store_results | ( | float * | buffer, |
const float32x4x2_t & | values | ||
) |
Stores a float32x4x2_t array into a memory location.
[in] | buffer | Pointer to the memory location where the values will be stored. |
[in] | values | Values that will be stored. |
void arm_compute::detail::store_results | ( | int32_t * | buffer, |
const int32x4x2_t & | values | ||
) |
Stores a uint32_t array into a memory location.
[in] | buffer | Pointer to the memory location where the values will be stored. |
[in] | values | Values that will be stored. |
|
inline |
Definition at line 116 of file NEDirectConvolution3x3.h.
Referenced by convolve_3x3().
|
inline |
Definition at line 110 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 123 of file NEDirectConvolution3x3.h.
Referenced by convolve_3x3().
|
inline |
Definition at line 117 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 129 of file NEDirectConvolution3x3.h.
Referenced by convolve_3x3().
|
inline |
Definition at line 123 of file NEDirectConvolutionDetail.h.
|
inline |
Check if an internal context is valid.
[in] | ctx | Internal context to check |
Definition at line 140 of file IContext.h.
References ARM_COMPUTE_LOG_ERROR_ACL, arm_compute::InvalidArgument, IContext::is_valid(), and arm_compute::Success.
Referenced by AclActivation(), AclCreateQueue(), AclCreateTensor(), AclCreateTensorPack(), AclDestroyContext(), AclGetClContext(), AclGetClDevice(), and AclSetClContext().
|
inline |
Check if an internal operator is valid.
[in] | op | Internal operator to check |
Definition at line 126 of file IOperator.h.
References ARM_COMPUTE_LOG_ERROR_ACL, arm_compute::InvalidArgument, and arm_compute::Success.
Referenced by AclDestroyOperator(), and AclRunOperator().
|
inline |
Check if an internal TensorPack is valid.
[in] | pack | Internal tensor pack to check |
Definition at line 120 of file TensorPack.h.
References ARM_COMPUTE_LOG_ERROR_ACL, arm_compute::InvalidArgument, arm_compute::test::validation::pack, and arm_compute::Success.
Referenced by AclDestroyTensorPack(), AclPackTensor(), AclPackTensors(), and AclRunOperator().
|
inline |
Check if an internal queue is valid.
[in] | queue | Internal queue to check |
Definition at line 89 of file IQueue.h.
References ARM_COMPUTE_LOG_ERROR_ACL, arm_compute::InvalidArgument, IQueue::is_valid(), and arm_compute::Success.
Referenced by AclDestroyQueue(), AclGetClQueue(), AclQueueFinish(), AclRunOperator(), and AclSetClQueue().
|
inline |
Check if an internal tensor is valid.
[in] | tensor | Internal tensor to check |
Definition at line 128 of file ITensorV2.h.
References ARM_COMPUTE_LOG_ERROR_ACL, arm_compute::InvalidArgument, arm_compute::Success, and tensor.
Referenced by AclDestroyTensor(), AclGetClMem(), AclMapTensor(), AclTensorImport(), and AclUnmapTensor().