21.02
|
Data Structures | |
struct | brelu |
Bounded RELU activation object. More... | |
class | compare_dimension |
Function to compare two Dimensions objects and throw an error on mismatch. More... | |
struct | dummy |
Dummy activation object. More... | |
struct | get_tensor_info_t |
Get the info for a tensor, dummy struct. More... | |
struct | get_tensor_info_t< ITensorInfo * > |
Get the info for a tensor. More... | |
struct | linear |
Linear activation object. More... | |
struct | logistic |
Logistic activation object. More... | |
struct | lubrelu |
Lower-Upper Bounded RELU activation object. More... | |
struct | relu |
RELU activation object. More... | |
struct | square |
Square activation object. More... | |
Functions | |
template<typename T > | |
bool | have_different_dimensions (const Dimensions< T > &dim1, const Dimensions< T > &dim2, unsigned int upper_dim) |
template<typename F > | |
arm_compute::Status | for_each_error (F &&) |
template<typename F , typename T , typename... Ts> | |
arm_compute::Status | for_each_error (F &&func, T &&arg, Ts &&... args) |
float32x4x3_t | load_matrix_row (const float *ptr) |
template<unsigned int stridex> | |
float32x4x2_t | convolve_3x3 (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<> | |
float32x4x2_t | convolve_3x3< 1 > (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<> | |
float32x4x2_t | convolve_3x3< 2 > (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<> | |
float32x4x2_t | convolve_3x3< 3 > (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) |
template<unsigned int stridex> | |
void | store_results (float *buffer, const float32x4x2_t &values) |
Stores a float32x4x2_t array into a memory location. More... | |
template<> | |
void | store_results< 1 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | store_results< 2 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | store_results< 3 > (float *buffer, const float32x4x2_t &values) |
template<unsigned int stridex> | |
int | get_input_num_elems_processed (unsigned int num_elems_written_per_iteration) |
template<> | |
int | get_input_num_elems_processed< 1 > (unsigned int num_elems_written_per_iteration) |
template<> | |
int | get_input_num_elems_processed< 2 > (unsigned int num_elems_written_per_iteration) |
template<> | |
int | get_input_num_elems_processed< 3 > (unsigned int num_elems_written_per_iteration) |
float32x4x3_t | load_matrix_row (const float *ptr, int weights_offset=0) |
Loads a 3x3 matrix as a row (float). More... | |
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > | |
int32x4x3_t | load_matrix_row (const T *ptr, int weights_offset=0) |
Loads a 3x3 matrix as a row (uint8_t/int8_t). More... | |
template<unsigned int stridex> | |
void | store_results (int32_t *buffer, const int32x4x2_t &values) |
Stores a uint32_t array into a memory location. More... | |
template<> | |
void | store_results< 1 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | store_results< 2 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | store_results< 3 > (int32_t *buffer, const int32x4x2_t &values) |
template<unsigned int stridex> | |
void | accumulate_results (float *buffer, const float32x4x2_t &values) |
template<> | |
void | accumulate_results< 1 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | accumulate_results< 2 > (float *buffer, const float32x4x2_t &values) |
template<> | |
void | accumulate_results< 3 > (float *buffer, const float32x4x2_t &values) |
template<unsigned int stridex> | |
void | accumulate_results (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | accumulate_results< 1 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | accumulate_results< 2 > (int32_t *buffer, const int32x4x2_t &values) |
template<> | |
void | accumulate_results< 3 > (int32_t *buffer, const int32x4x2_t &values) |
float32x4_t | single_convolve_3x3_dilation (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, const size_t dilation_x, int input_offset) |
Perform a 3x3 convolution for 4 consecutive elements on float32 when dilation.x() or dilation.y() is not 1. More... | |
float32x4x2_t | convolve_3x3_dilation (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, const size_t dilation_x, unsigned int stridex, int input_offset=0) |
Perform a 3x3 convolution for 8 consecutive elements on float32 when dilation.x() or dilation.y() is not 1. More... | |
template<bool accumulate> | |
void | convolve_3x3 (const float *in_top, const float *in_mid, const float *in_low, float *out_ptr, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, unsigned int stridex, int input_offset=0) |
Perform a convolve3x3 on float32. More... | |
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > | |
int32x4_t | single_convolve_3x3_dilation (const T *in_top, const T *in_mid, const T *in_low, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, size_t dilation_x, int32_t input_offset) |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1. More... | |
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > | |
int32x4x2_t | convolve_3x3_dilation (const T *in_top, const T *in_mid, const T *in_low, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, const size_t dilation_x, unsigned int stridex, int input_offset) |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1. More... | |
template<bool accumulate, typename T1 , typename T2 , ARM_COMPUTE_REQUIRES_TA(std::is_same< T1, uint8_t >::value||std::is_same< T1, int8_t >::value) > | |
void | convolve_3x3 (const T1 *in_top, const T1 *in_mid, const T1 *in_low, T2 *out_ptr, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, unsigned int stridex, int32_t input_offset) |
Perform a convolve3x3 on 8-bit elements. More... | |
int | get_input_num_elems_processed (unsigned int num_elems_written_per_iteration, unsigned int stridex) |
__ARM_FEATURE_FP16_VECTOR_ARITHMETIC More... | |
|
inline |
Referenced by accumulate_results< 3 >(), and store_results< 3 >().
void arm_compute::detail::accumulate_results | ( | int32_t * | buffer, |
const int32x4x2_t & | values | ||
) |
|
inline |
Definition at line 145 of file NEDirectConvolutionDetail.h.
Referenced by accumulate_results< 3 >(), and convolve_3x3().
|
inline |
Definition at line 167 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 152 of file NEDirectConvolutionDetail.h.
Referenced by accumulate_results< 3 >(), and convolve_3x3().
|
inline |
Definition at line 174 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 158 of file NEDirectConvolutionDetail.h.
References accumulate_results().
Referenced by accumulate_results< 3 >(), and convolve_3x3().
|
inline |
Definition at line 180 of file NEDirectConvolutionDetail.h.
References accumulate_results(), accumulate_results< 1 >(), accumulate_results< 2 >(), accumulate_results< 3 >(), store_results(), store_results< 1 >(), store_results< 2 >(), store_results< 3 >(), vadd_f16(), and vaddq_f16().
float32x4x2_t arm_compute::detail::convolve_3x3 | ( | const float * | in_top, |
const float * | in_mid, | ||
const float * | in_low, | ||
const float32x4x3_t & | m0, | ||
const float32x4x3_t & | m1, | ||
const float32x4x3_t & | m2 | ||
) |
Referenced by convolve_3x3_dilation(), and load_matrix_row().
|
inline |
Perform a convolve3x3 on float32.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[out] | out_ptr | Pointer to the output. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | (Optional) Input quantization offset. |
Definition at line 353 of file NEDirectConvolutionDetail.h.
References accumulate(), accumulate_results< 1 >(), accumulate_results< 2 >(), accumulate_results< 3 >(), ARM_COMPUTE_ERROR_ON, ARM_COMPUTE_UNUSED, store_results< 1 >(), store_results< 2 >(), and store_results< 3 >().
void arm_compute::detail::convolve_3x3 | ( | const T1 * | in_top, |
const T1 * | in_mid, | ||
const T1 * | in_low, | ||
T2 * | out_ptr, | ||
const int32x4x3_t & | m0, | ||
const int32x4x3_t & | m1, | ||
const int32x4x3_t & | m2, | ||
unsigned int | stridex, | ||
int32_t | input_offset | ||
) |
Perform a convolve3x3 on 8-bit elements.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[out] | out_ptr | Pointer to the output. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | Input quantization offset. |
Definition at line 594 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_ERROR_ON.
|
inline |
Definition at line 51 of file NEDirectConvolution3x3.h.
Referenced by convolve_3x3< 2 >(), and convolve_3x3< 3 >().
|
inline |
Definition at line 109 of file NEDirectConvolution3x3.h.
References convolve_3x3< 1 >().
|
inline |
Definition at line 119 of file NEDirectConvolution3x3.h.
References convolve_3x3< 1 >(), and store_results().
|
inline |
Perform a 3x3 convolution for 8 consecutive elements on float32 when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | (Optional) Input quantization offset. |
Definition at line 307 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_ERROR_ON, convolve_3x3(), and single_convolve_3x3_dilation().
|
inline |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | stridex | Stride value in elements across x. |
[in] | input_offset | Input quantization offset. |
Definition at line 555 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_ERROR_ON, single_convolve_3x3_dilation(), arm_compute::wrapper::vgetlane(), and arm_compute::wrapper::vsetlane().
|
inline |
Definition at line 108 of file Validate.h.
Referenced by arm_compute::error_on_mismatching_dimensions(), and for_each_error().
|
inline |
Definition at line 114 of file Validate.h.
References GemmTuner::args, ARM_COMPUTE_RETURN_ON_ERROR, for_each_error(), and func.
int arm_compute::detail::get_input_num_elems_processed | ( | unsigned int | num_elems_written_per_iteration | ) |
Referenced by store_results< 3 >().
|
inline |
__ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Get the number of elements processed on 3x3 convolution.
[in] | num_elems_written_per_iteration | Number of elements written per iteration on 3x3 convolution. |
[in] | stridex | Stride value in elements across x. |
Definition at line 948 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_ERROR.
int arm_compute::detail::get_input_num_elems_processed< 1 > | ( | unsigned int | num_elems_written_per_iteration | ) |
Definition at line 152 of file NEDirectConvolution3x3.h.
int arm_compute::detail::get_input_num_elems_processed< 2 > | ( | unsigned int | num_elems_written_per_iteration | ) |
Definition at line 158 of file NEDirectConvolution3x3.h.
int arm_compute::detail::get_input_num_elems_processed< 3 > | ( | unsigned int | num_elems_written_per_iteration | ) |
Definition at line 164 of file NEDirectConvolution3x3.h.
|
inline |
Definition at line 51 of file Validate.h.
Referenced by CLPixelWiseMultiplicationKernel::border_size(), arm_compute::error_on_mismatching_shapes(), arm_compute::error_on_tensors_not_even(), arm_compute::error_on_tensors_not_subsampled(), compare_dimension< T >::operator()(), NEPixelWiseMultiplicationKernel::run_op(), and NELogicalKernel::validate().
|
inline |
Definition at line 34 of file NEDirectConvolution3x3.h.
References convolve_3x3().
|
inline |
Loads a 3x3 matrix as a row (float).
[in] | ptr | Pointer to a float 3x3 matrix. |
[in] | weights_offset | (Optional) Weights quantization offset. |
Definition at line 46 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_UNUSED.
|
inline |
Loads a 3x3 matrix as a row (uint8_t/int8_t).
[in] | ptr | Pointer to a uint8_t/int8_t 3x3 matrix. |
[in] | weights_offset | (Optional) Weights quantization offset. |
Definition at line 68 of file NEDirectConvolutionDetail.h.
References store_results(), store_results< 1 >(), store_results< 2 >(), and store_results< 3 >().
|
inline |
Perform a 3x3 convolution for 4 consecutive elements on float32 when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | input_offset | (Optional) Input quantization offset. |
Definition at line 249 of file NEDirectConvolutionDetail.h.
References ARM_COMPUTE_UNUSED.
Referenced by convolve_3x3_dilation().
|
inline |
Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1.
[in] | in_top | Pointer to the first row of the input. |
[in] | in_mid | Pointer to the second row of the input. |
[in] | in_low | Pointer to the third row of the input. |
[in] | m0 | First row of the filter. |
[in] | m1 | Second row of the filter. |
[in] | m2 | Third row of the filter. |
[in] | dilation_x | Dilation, in elements across x. |
[in] | input_offset | Input quantization offset. |
Definition at line 467 of file NEDirectConvolutionDetail.h.
void arm_compute::detail::store_results | ( | int32_t * | buffer, |
const int32x4x2_t & | values | ||
) |
Stores a uint32_t array into a memory location.
[in] | buffer | Pointer to the memory location where the values will be stored. |
[in] | values | Values that will be stored. |
void store_results | ( | float * | buffer, |
const float32x4x2_t & | values | ||
) |
Stores a float32x4x2_t array into a memory location.
[in] | buffer | Pointer to the memory location where the values will be stored. |
[in] | values | Values that will be stored. |
Referenced by accumulate_results< 3 >(), NEConvolutionKernel< matrix_size >::configure(), convolve_3x3< 3 >(), load_matrix_row(), NESeparableConvolutionVertKernel< matrix_size >::run(), and NEConvolutionRectangleKernel::run().
|
inline |
Definition at line 123 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 130 of file NEDirectConvolution3x3.h.
Referenced by accumulate_results< 3 >(), convolve_3x3(), and load_matrix_row().
|
inline |
Definition at line 130 of file NEDirectConvolutionDetail.h.
|
inline |
Definition at line 137 of file NEDirectConvolution3x3.h.
Referenced by accumulate_results< 3 >(), convolve_3x3(), and load_matrix_row().
|
inline |
Definition at line 136 of file NEDirectConvolutionDetail.h.
References accumulate_results().
|
inline |
Definition at line 143 of file NEDirectConvolution3x3.h.
References get_input_num_elems_processed().
Referenced by accumulate_results< 3 >(), convolve_3x3(), and load_matrix_row().