|
float32x4x3_t | load_matrix_row (const float *ptr, int weights_offset=0) |
| Loads a 3x3 matrix as a row (float). More...
|
|
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > |
int32x4x3_t | load_matrix_row (const T *ptr, int weights_offset=0) |
| Loads a 3x3 matrix as a row (uint8_t/int8_t). More...
|
|
template<unsigned int stridex> |
void | store_results (float *buffer, const float32x4x2_t &values) |
| Stores a float32x4x2_t array into a memory location. More...
|
|
template<> |
void | store_results< 1 > (float *buffer, const float32x4x2_t &values) |
|
template<> |
void | store_results< 2 > (float *buffer, const float32x4x2_t &values) |
|
template<> |
void | store_results< 3 > (float *buffer, const float32x4x2_t &values) |
|
template<unsigned int stridex> |
void | store_results (int32_t *buffer, const int32x4x2_t &values) |
| Stores a uint32_t array into a memory location. More...
|
|
template<> |
void | store_results< 1 > (int32_t *buffer, const int32x4x2_t &values) |
|
template<> |
void | store_results< 2 > (int32_t *buffer, const int32x4x2_t &values) |
|
template<> |
void | store_results< 3 > (int32_t *buffer, const int32x4x2_t &values) |
|
template<unsigned int stridex> |
void | accumulate_results (float *buffer, const float32x4x2_t &values) |
|
template<> |
void | accumulate_results< 1 > (float *buffer, const float32x4x2_t &values) |
|
template<> |
void | accumulate_results< 2 > (float *buffer, const float32x4x2_t &values) |
|
template<> |
void | accumulate_results< 3 > (float *buffer, const float32x4x2_t &values) |
|
template<unsigned int stridex> |
void | accumulate_results (int32_t *buffer, const int32x4x2_t &values) |
|
template<> |
void | accumulate_results< 1 > (int32_t *buffer, const int32x4x2_t &values) |
|
template<> |
void | accumulate_results< 2 > (int32_t *buffer, const int32x4x2_t &values) |
|
template<> |
void | accumulate_results< 3 > (int32_t *buffer, const int32x4x2_t &values) |
|
float32x4_t | single_convolve_3x3_dilation (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, const size_t dilation_x, int input_offset) |
| Perform a 3x3 convolution for 4 consecutive elements on float32 when dilation.x() or dilation.y() is not 1. More...
|
|
float32x4x2_t | convolve_3x3_dilation (const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, const size_t dilation_x, unsigned int stridex, int input_offset=0) |
| Perform a 3x3 convolution for 8 consecutive elements on float32 when dilation.x() or dilation.y() is not 1. More...
|
|
template<bool accumulate> |
void | convolve_3x3 (const float *in_top, const float *in_mid, const float *in_low, float *out_ptr, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, unsigned int stridex, int input_offset=0) |
| Perform a convolve3x3 on float32. More...
|
|
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > |
int32x4_t | single_convolve_3x3_dilation (const T *in_top, const T *in_mid, const T *in_low, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, size_t dilation_x, int32_t input_offset) |
| Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1. More...
|
|
template<typename T , ARM_COMPUTE_REQUIRES_TA(std::is_same< T, uint8_t >::value||std::is_same< T, int8_t >::value) > |
int32x4x2_t | convolve_3x3_dilation (const T *in_top, const T *in_mid, const T *in_low, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, const size_t dilation_x, unsigned int stridex, int input_offset) |
| Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1. More...
|
|
template<bool accumulate, typename T1 , typename T2 , ARM_COMPUTE_REQUIRES_TA(std::is_same< T1, uint8_t >::value||std::is_same< T1, int8_t >::value) > |
void | convolve_3x3 (const T1 *in_top, const T1 *in_mid, const T1 *in_low, T2 *out_ptr, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, unsigned int stridex, int32_t input_offset) |
| Perform a convolve3x3 on 8-bit elements. More...
|
|
int | get_input_num_elems_processed (unsigned int num_elems_written_per_iteration, unsigned int stridex) |
| __ARM_FEATURE_FP16_VECTOR_ARITHMETIC More...
|
|