21.05
|
Data Structures | |
struct | brelu |
Bounded RELU activation object. More... | |
class | compare_dimension |
Function to compare two Dimensions objects and throw an error on mismatch. More... | |
struct | dummy |
Dummy activation object. More... | |
struct | get_tensor_info_t |
Get the info for a tensor, dummy struct. More... | |
struct | get_tensor_info_t< ITensorInfo * > |
Get the info for a tensor. More... | |
struct | Header |
struct | linear |
Linear activation object. More... | |
struct | logistic |
Logistic activation object. More... | |
struct | lubrelu |
Lower-Upper Bounded RELU activation object. More... | |
struct | relu |
RELU activation object. More... | |
struct | square |
Square activation object. More... | |
Enumerations | |
enum | ObjectType : uint32_t { Context = 1, Queue = 2, Tensor = 3, TensorPack = 4, Operator = 5, Invalid = 0x56DEAD78 } |
< Object type enumerations More... | |
Functions | |
template<typename T > | |
bool | have_different_dimensions (const Dimensions< T > &dim1, const Dimensions< T > &dim2, unsigned int upper_dim) |
template<typename F > | |
arm_compute::Status | for_each_error (F &&) |
template<typename F , typename T , typename... Ts> | |
arm_compute::Status | for_each_error (F &&func, T &&arg, Ts &&... args) |
StatusCode | validate_internal_context (const IContext *ctx) |
Check if an internal context is valid. More... | |
StatusCode | validate_internal_queue (const IQueue *queue) |
Check if an internal queue is valid. More... | |
StatusCode | validate_internal_tensor (const ITensorV2 *tensor) |
Check if an internal tensor is valid. More... | |
StatusCode | validate_internal_pack (const TensorPack *pack) |
Check if an internal TensorPack is valid. More... | |
TensorInfo | convert_to_legacy_tensor_info (const AclTensorDescriptor &desc) |
Convert a descriptor to a legacy format one. More... | |
AclTensorDescriptor | convert_to_descriptor (const TensorInfo &info) |
Convert a legacy tensor meta-data to a descriptor. More... | |
float32x4x3_t | load_matrix_row (const float *ptr) |
template<unsigned int stridex> | |
float32x4x2_t | convolve_3x3 (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<> | |
float32x4x2_t | convolve_3x3< 1 > (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<> | |
float32x4x2_t | convolve_3x3< 2 > (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<> | |
float32x4x2_t | convolve_3x3< 3 > (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<unsigned int stridex> | |
void | store_results (float *buffer, const float32x4x2_t &values) |
Stores a float32x4x2_t array into a memory location. More... | |
template<> | |
void | store_results< 1 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | store_results< 2 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | store_results< 3 > (float *buffer, const float32x4x2_t &values) |
template<unsigned int stridex> | |
int | get_input_num_elems_processed (unsigned int num_elems_written_per_iteration) |
template<> | |
int | get_input_num_elems_processed< 1 > (unsigned int num_elems_written_per_iteration) |
template<> | |
int | get_input_num_elems_processed< 2 > (unsigned int num_elems_written_per_iteration) |
template<> | |
int | get_input_num_elems_processed< 3 > (unsigned int num_elems_written_per_iteration) |
float32x4x3_t | load_matrix_row (const float *ptr, int weights_offset=0) |
Loads a 3x3 matrix as a row (float). More... | |
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > | |
int32x4x3_t | load_matrix_row (const T *ptr, int weights_offset=0) |
Loads a 3x3 matrix as a row (uint8_t/int8_t). More... | |
template<unsigned int stridex> | |
void | store_results (int32_t *buffer, const int32x4x2_t &values) |
Stores a uint32_t array into a memory location. More... | |
template<> | |
void | store_results< 1 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | store_results< 2 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | store_results< 3 > (int32_t *buffer, const int32x4x2_t &values) |
template<unsigned int stridex> | |
void | accumulate_results (float *buffer, const float32x4x2_t &values) |
template<> | |
void | accumulate_results< 1 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | accumulate_results< 2 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | accumulate_results< 3 > (float *buffer, const float32x4x2_t &values) |
template<unsigned int stridex> | |
void | accumulate_results (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | accumulate_results< 1 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | accumulate_results< 2 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | accumulate_results< 3 > (int32_t *buffer, const int32x4x2_t &values) |
float32x4_t | single_convolve_3x3_dilation (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, const size_t dilation_x, int input_offset) |
Perform a 3x3 convolution for 4 consecutive elements on float32 when dilation.x() or dilation.y() is not 1. More... | |
float32x4x2_t | convolve_3x3_dilation (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, const size_t dilation_x, unsigned int stridex, int input_offset=0) |
Perform a 3x3 convolution for 8 consecutive elements on float32 when dilation.x() or dilation.y() is not 1. More... | |
template<bool accumulate> | |
void | convolve_3x3 (const float *in_top, const float *in_mid, const float *in_low, float *out_ptr, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, unsigned int stridex, int input_offset=0) |
Perform a convolve3x3 on float32. More... | |
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > | |
int32x4_t | single_convolve_3x3_dilation (const T *in_top, const T *in_mid, const T *in_low, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, size_t dilation_x, int32_t input_offset) |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1. More... | |
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > | |
int32x4x2_t | convolve_3x3_dilation (const T *in_top, const T *in_mid, const T *in_low, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, const size_t dilation_x, unsigned int stridex, int input_offset) |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1. More... | |
template<bool accumulate, typename T1 , typename T2 , ARM_COMPUTE_REQUIRES_TA(std::is_same< T1, uint8_t >::value||std::is_same< T1, int8_t >::value) > | |
void | convolve_3x3 (const T1 *in_top, const T1 *in_mid, const T1 *in_low, T2 *out_ptr, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, unsigned int stridex, int32_t input_offset) |
Perform a convolve3x3 on 8-bit elements. More... | |
int | get_input_num_elems_processed (unsigned int num_elems_written_per_iteration, unsigned int stridex) |
__ARM_FEATURE_FP16_VECTOR_ARITHMETIC More... | |
|
strong |
|
inline |
void arm_compute::detail::accumulate_results | ( | int32_t * | buffer, |
const int32x4x2_t & | values | ||
) |
|
inline |
|
inline |
Definition at line 166 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 151 of file NEDirectConvolutionDetail.h.
Referenced by convolve_3x3().
|
inline |
Definition at line 173 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 157 of file NEDirectConvolutionDetail.h.
Referenced by convolve_3x3().
|
inline |
Definition at line 179 of file NEDirectConvolutionDetail.h.
AclTensorDescriptor convert_to_descriptor | ( | const TensorInfo & | info | ) |
Convert a legacy tensor meta-data to a descriptor.
[in] | info | Legacy tensor meta-data |
Definition at line 97 of file LegacySupport.cpp.
References arm_compute::test::validation::info.
Referenced by ITensorV2::get_descriptor().
TensorInfo convert_to_legacy_tensor_info | ( | const AclTensorDescriptor & | desc | ) |
Convert a descriptor to a legacy format one.
[in] | desc | Descriptor to convert |
Definition at line 90 of file LegacySupport.cpp.
References AclTensorDescriptor::data_type, TensorInfo::init(), AclTensorDescriptor::ndims, and AclTensorDescriptor::shape.
Referenced by ClTensor::ClTensor(), and CpuTensor::CpuTensor().
float32x4x2_t arm_compute::detail::convolve_3x3 | ( | const float * | in_top, |
const float * | in_mid, | ||
const float * | in_low, | ||
const float32x4x3_t & | m0, | ||
const float32x4x3_t & | m1, | ||
const float32x4x3_t & | m2 | ||
) |
|
inline |
Perform a convolve3x3 on float32.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[out] | out_ptr | Pointer to the output. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | (Optional) Input quantization offset. |
Definition at line 352 of file NEDirectConvolutionDetail.h.
References arm_compute::test::validation::reference::accumulate(), accumulate_results< 1 >(), accumulate_results< 2 >(), accumulate_results< 3 >(), ARM_COMPUTE_ERROR_ON, ARM_COMPUTE_UNUSED, store_results< 1 >(), store_results< 2 >(), and store_results< 3 >().
void arm_compute::detail::convolve_3x3 | ( | const T1 * | in_top, |
const T1 * | in_mid, | ||
const T1 * | in_low, | ||
T2 * | out_ptr, | ||
const int32x4x3_t & | m0, | ||
const int32x4x3_t & | m1, | ||
const int32x4x3_t & | m2, | ||
unsigned int | stridex, | ||
int32_t | input_offset | ||
) |
Perform a convolve3x3 on 8-bit elements.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[out] | out_ptr | Pointer to the output. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | Input quantization offset. |
Definition at line 593 of file NEDirectConvolutionDetail.h.
References arm_compute::test::validation::reference::accumulate(), accumulate_results< 1 >(), accumulate_results< 2 >(), accumulate_results< 3 >(), ARM_COMPUTE_ERROR_ON, store_results< 1 >(), store_results< 2 >(), store_results< 3 >(), type, arm_compute::wrapper::vaddw(), arm_compute::wrapper::vdup_n(), arm_compute::wrapper::vext_1(), arm_compute::wrapper::vext_2(), arm_compute::wrapper::vgethigh(), arm_compute::wrapper::vgetlane(), arm_compute::wrapper::vgetlow(), arm_compute::wrapper::vload(), arm_compute::wrapper::vmla(), arm_compute::wrapper::vmovl(), arm_compute::wrapper::vreinterpret(), and arm_compute::wrapper::vsetlane().
|
inline |
Definition at line 51 of file NEDirectConvolution3x3.h.
Referenced by convolve_3x3< 2 >(), and convolve_3x3< 3 >().
|
inline |
Definition at line 109 of file NEDirectConvolution3x3.h.
References convolve_3x3< 1 >().
|
inline |
Definition at line 119 of file NEDirectConvolution3x3.h.
References convolve_3x3< 1 >().
|
inline |
Perform a 3x3 convolution for 8 consecutive elements on float32 when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | (Optional) Input quantization offset. |
Definition at line 306 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_ERROR_ON, and single_convolve_3x3_dilation().
|
inline |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | Input quantization offset. |
Definition at line 554 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_ERROR_ON, single_convolve_3x3_dilation(), arm_compute::wrapper::vgetlane(), and arm_compute::wrapper::vsetlane().
|
inline |
Definition at line 104 of file Validate.h.
Referenced by arm_compute::error_on_mismatching_dimensions(), and for_each_error().
|
inline |
Definition at line 110 of file Validate.h.
References GemmTuner::args, ARM_COMPUTE_RETURN_ON_ERROR, for_each_error(), and func.
int arm_compute::detail::get_input_num_elems_processed | ( | unsigned int | num_elems_written_per_iteration | ) |
|
inline |
__ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Get the number of elements processed on 3x3 convolution.
[in] | num_elems_written_per_iteration | Number of elements written per iteration on 3x3 convolution. |
[in] | stridex | Stride value in elements across x. |
Definition at line 947 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_ERROR.
int arm_compute::detail::get_input_num_elems_processed< 1 > | ( | unsigned int | num_elems_written_per_iteration | ) |
Definition at line 152 of file NEDirectConvolution3x3.h.
int arm_compute::detail::get_input_num_elems_processed< 2 > | ( | unsigned int | num_elems_written_per_iteration | ) |
Definition at line 158 of file NEDirectConvolution3x3.h.
int arm_compute::detail::get_input_num_elems_processed< 3 > | ( | unsigned int | num_elems_written_per_iteration | ) |
Definition at line 164 of file NEDirectConvolution3x3.h.
|
inline |
Definition at line 47 of file Validate.h.
Referenced by arm_compute::error_on_mismatching_shapes(), arm_compute::error_on_tensors_not_even(), arm_compute::error_on_tensors_not_subsampled(), compare_dimension< T >::operator()(), and NELogicalKernel::validate().
|
inline |
Definition at line 34 of file NEDirectConvolution3x3.h.
|
inline |
Loads a 3x3 matrix as a row (float).
[in] | ptr | Pointer to a float 3x3 matrix. |
[in] | weights_offset | (Optional) Weights quantization offset. |
Definition at line 45 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_UNUSED.
|
inline |
Loads a 3x3 matrix as a row (uint8_t/int8_t).
[in] | ptr | Pointer to a uint8_t/int8_t 3x3 matrix. |
[in] | weights_offset | (Optional) Weights quantization offset. |
Definition at line 67 of file NEDirectConvolutionDetail.h.
|
inline |
Perform a 3x3 convolution for 4 consecutive elements on float32 when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | input_offset | (Optional) Input quantization offset. |
Definition at line 248 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_UNUSED.
Referenced by convolve_3x3_dilation().
|
inline |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | input_offset | Input quantization offset. |
Definition at line 466 of file NEDirectConvolutionDetail.h.
References type, arm_compute::wrapper::vaddw(), arm_compute::wrapper::vdup_n(), arm_compute::wrapper::vgetlow(), arm_compute::wrapper::vload(), arm_compute::wrapper::vmla(), arm_compute::wrapper::vmovl(), arm_compute::wrapper::vmul(), and arm_compute::wrapper::vreinterpret().
void arm_compute::detail::store_results | ( | int32_t * | buffer, |
const int32x4x2_t & | values | ||
) |
Stores a uint32_t array into a memory location.
[in] | buffer | Pointer to the memory location where the values will be stored. |
[in] | values | Values that will be stored. |
void store_results | ( | float * | buffer, |
const float32x4x2_t & | values | ||
) |
Stores a float32x4x2_t array into a memory location.
[in] | buffer | Pointer to the memory location where the values will be stored. |
[in] | values | Values that will be stored. |
|
inline |
Definition at line 122 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 130 of file NEDirectConvolution3x3.h.
Referenced by convolve_3x3().
|
inline |
Definition at line 129 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 137 of file NEDirectConvolution3x3.h.
Referenced by convolve_3x3().
|
inline |
Definition at line 135 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 143 of file NEDirectConvolution3x3.h.
Referenced by convolve_3x3().
|
inline |
Check if an internal context is valid.
[in] | ctx | Internal context to check |
Definition at line 143 of file IContext.h.
References ARM_COMPUTE_LOG_ERROR_ACL, arm_compute::InvalidArgument, IContext::is_valid(), and arm_compute::Success.
Referenced by AclCreateQueue(), AclCreateTensor(), AclCreateTensorPack(), AclDestroyContext(), AclGetClContext(), AclGetClDevice(), and AclSetClContext().
|
inline |
Check if an internal TensorPack is valid.
[in] | pack | Internal tensor pack to check |
Definition at line 119 of file TensorPack.h.
References ARM_COMPUTE_LOG_ERROR_ACL, arm_compute::InvalidArgument, TensorPack::is_valid(), and arm_compute::Success.
Referenced by AclDestroyTensorPack(), AclPackTensor(), and AclPackTensors().
|
inline |
Check if an internal queue is valid.
[in] | queue | Internal queue to check |
Definition at line 89 of file IQueue.h.
References ARM_COMPUTE_LOG_ERROR_ACL, arm_compute::InvalidArgument, IQueue::is_valid(), and arm_compute::Success.
Referenced by AclDestroyQueue(), AclGetClQueue(), AclQueueFinish(), and AclSetClQueue().
|
inline |
Check if an internal tensor is valid.
[in] | tensor | Internal tensor to check |
Definition at line 129 of file ITensorV2.h.
References ARM_COMPUTE_LOG_ERROR_ACL, arm_compute::InvalidArgument, ITensorV2::is_valid(), and arm_compute::Success.
Referenced by AclDestroyTensor(), AclGetClMem(), AclMapTensor(), AclTensorImport(), and AclUnmapTensor().