Compute Library
 23.11
arm_conv::depthwise Namespace Reference

Namespaces

 depthfirst_multiplier
 
 depthwise_depthfirst
 
 interleaves
 

Data Structures

struct  DefaultOutputStage
 
struct  DefaultOutputStage< int8_t >
 
struct  DefaultOutputStage< uint8_t >
 
struct  DefaultTAccum
 
struct  DefaultTAccum< int8_t >
 
struct  DefaultTAccum< uint8_t >
 
class  DepthfirstDriver
 
class  DepthfirstMultiplierStrategy
 
class  DepthfirstMultiplierStrategy< TInput, TWeight, TOutput, int32_t >
 
class  DepthfirstStrategy
 
class  DepthfirstStrategyUntyped
 
class  DepthwiseDepthfirst
 
class  DepthwiseDepthfirstCommon
 
class  DepthwiseDepthfirstGeneric
 
struct  DepthwiseDepthfirstGenericKernelCall
 
struct  DepthwiseDepthfirstGenericKernelCall< arm_gemm::Requantize32 >
 
struct  DepthwiseDepthfirstGenericKernelCall< Nothing >
 
class  DepthwiseDepthfirstMultiplier
 
class  DepthwiseDepthfirstStrategy
 
class  DepthwiseDepthfirstStrategy< TInput, TWeight, TOutput, int32_t >
 
class  DepthwiseDepthfirstStrategyCommon
 
struct  DepthwiseImplementation
 
class  DepthwisePlanar
 
class  GenericDepthfirstKernelStrategy
 
struct  GenericDepthfirstKernelStrategyFunctionType
 
struct  GenericDepthfirstKernelStrategyFunctionType< TInput, TOutput, int32_t >
 
class  GenericDepthfirstMultiplierKernelStrategy
 
class  GenericDepthfirstMultiplierKernelStrategy< TInput, TWeight, TOutput, int32_t >
 
class  GenericDepthfirstMultiplierStrategy
 
class  GenericDepthfirstStrategy
 
class  GenericInputArrayElement
 
class  IDepthfirstStrategy
 
struct  interleave_a64_s8q_3x3_dot
 
struct  interleave_a64_u8q_3x3_dot
 
class  IPlanarStrategy
 
struct  PlanarKernelType
 
struct  PlanarKernelType< TInput, TWeight, TOutput, int32_t, arm_gemm::Requantize32 >
 
struct  PlanarKernelType< TInput, TWeight, TOutput, TAccum, Nothing >
 
class  PlanarStrategy
 
class  sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst
 
class  sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst
 
class  sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst
 
class  sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst
 
class  sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst
 
class  sme2_fp32_planar_3x3_s1_4rows_mla_za
 
class  sme2_fp32_planar_3x3_s2_4rows_mla_za
 
class  sme2_fp32_planar_5x5_s1_4rows_mla_za
 
class  sme2_fp32_planar_5x5_s2_4rows_mla_za
 
class  sme2_fp32bf16fp32_planar_3x3_s1_4rows_dot_za
 
class  sme2_fp32bf16fp32_planar_3x3_s2_4rows_dot_za
 
class  sme2_fp32bf16fp32_planar_5x5_s1_4rows_dot_za
 
class  sme2_fp32bf16fp32_planar_5x5_s2_4rows_dot_za
 
class  sme2_s8q_planar_3x3_s1_4rows_dot_za
 
class  sme2_s8q_planar_3x3_s2_4rows_dot_za
 
class  sme2_s8q_planar_5x5_s1_4rows_dot_za
 
class  sme2_s8q_planar_5x5_s2_4rows_dot_za
 
class  sme2_u8q_planar_3x3_s1_4rows_dot_za
 
class  sme2_u8q_planar_3x3_s2_4rows_dot_za
 
class  sme2_u8q_planar_5x5_s1_4rows_dot_za
 
class  sme2_u8q_planar_5x5_s2_4rows_dot_za
 
class  sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za
 
class  sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za
 
class  sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za
 
class  sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za
 
struct  TensorSpec
 

Functions

std::tuple< size_t, size_t, size_t, size_t, size_t > get_reduced_view_for_dilation (size_t out_size, size_t in_size, const size_t d, const size_t dilation_factor, const size_t kernel_size, const size_t stride, const size_t orig_pad_before)
 
template<>
const DepthwiseImplementation< float > * depthwise_implementation_list ()
 
template UniqueDepthwiseCommon< float > depthwise (const DepthwiseArgs &, const Nothing &)
 
template std::vector< KernelDescription > get_compatible_kernels< float > (const DepthwiseArgs &, const Nothing &)
 
template<typename TInput , typename TWeight = TInput, typename TOutput = TInput, class OutputStage = Nothing>
bool find_implementation (const DepthwiseArgs &args, const OutputStage &os, const DepthwiseImplementation< TInput, TWeight, TOutput, OutputStage > *&selected)
 
template<typename TInput , typename TWeight , typename TOutput , class OutputStage >
std::vector< KernelDescription > get_compatible_kernels (const DepthwiseArgs &args, const OutputStage &os)
 
template<typename TInput , typename TWeight , typename TOutput , class OutputStage >
UniqueDepthwiseCommon< TInput, TWeight, TOutput > depthwise (const DepthwiseArgs &args, const OutputStage &os)
 
template UniqueDepthwiseCommon< int8_t, int8_t, int8_t > depthwise (const DepthwiseArgs &, const Requantize32 &)
 
template std::vector< KernelDescription > get_compatible_kernels< int8_t, int8_t, int8_t, Requantize32 > (const DepthwiseArgs &, const Requantize32 &)
 
template std::vector< KernelDescription > get_compatible_kernels< uint8_t, uint8_t, uint8_t, Requantize32 > (const DepthwiseArgs &, const Requantize32 &)
 
template std::vector< KernelDescription > get_compatible_kernels< uint8_t, int8_t, uint8_t, Requantize32 > (const DepthwiseArgs &, const Requantize32 &)
 
void sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst_indirect_impl (const __fp16 *const *const input_ptrs, __fp16 *const *const outptrs, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max)
 
void sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst_direct_impl (const unsigned int n_tile_rows, const unsigned int n_tile_cols, const __fp16 *inptr, int64_t ld_input_row, int64_t ld_input_col, __fp16 *outptr, int64_t ld_output_row, int64_t ld_output_col, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max)
 
void sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst_indirect_impl (const __fp16 *const *const input_ptrs, __fp16 *const *const outptrs, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max)
 
void sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl (const unsigned int n_tile_rows, const unsigned int n_tile_cols, const __fp16 *inptr, int64_t ld_input_row, int64_t ld_input_col, __fp16 *outptr, int64_t ld_output_row, int64_t ld_output_col, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max)
 
void sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl (const __fp16 *const *const input_ptrs, __fp16 *const *const outptrs, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max)
 
void sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl (const unsigned int n_tile_rows, const unsigned int n_tile_cols, const __fp16 *inptr, int64_t ld_input_row, int64_t ld_input_col, __fp16 *outptr, int64_t ld_output_row, int64_t ld_output_col, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max)
 
void sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_indirect_impl (const __fp16 *const *const input_ptrs, __fp16 *const *const outptrs, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max)
 
void sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_direct_impl (const unsigned int n_tile_rows, const unsigned int n_tile_cols, const __fp16 *inptr, int64_t ld_input_row, int64_t ld_input_col, __fp16 *outptr, int64_t ld_output_row, int64_t ld_output_col, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max)
 
void sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_indirect_impl (const __fp16 *const *const input_ptrs, __fp16 *const *const outptrs, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max)
 
void sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_direct_impl (const unsigned int n_tile_rows, const unsigned int n_tile_cols, const __fp16 *inptr, int64_t ld_input_row, int64_t ld_input_col, __fp16 *outptr, int64_t ld_output_row, int64_t ld_output_col, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max)
 
void sme2_fp32_planar_3x3_s1_4rows_mla_za_impl (const float *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const float *weights, const float *bias, float **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, float act_min, float act_max)
 
void sme2_fp32_planar_3x3_s2_4rows_mla_za_impl (const float *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const float *weights, const float *bias, float **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, float act_min, float act_max)
 
void sme2_fp32_planar_5x5_s1_4rows_mla_za_impl (const float *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const float *weights, const float *bias, float **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, float act_min, float act_max)
 
void sme2_fp32_planar_5x5_s2_4rows_mla_za_impl (const float *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const float *weights, const float *bias, float **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, float act_min, float act_max)
 
void sme2_fp32bf16fp32_planar_3x3_s1_4rows_dot_za_impl (const float *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const float *weights, const float *bias, float **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, float act_min, float act_max)
 
void sme2_fp32bf16fp32_planar_3x3_s2_4rows_dot_za_impl (const float *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const float *weights, const float *bias, float **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, float act_min, float act_max)
 
void sme2_fp32bf16fp32_planar_5x5_s1_4rows_dot_za_impl (const float *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const float *weights, const float *bias, float **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, float act_min, float act_max)
 
void sme2_fp32bf16fp32_planar_5x5_s2_4rows_dot_za_impl (const float *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const float *weights, const float *bias, float **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, float act_min, float act_max)
 
void sme2_s8q_planar_3x3_s1_4rows_dot_za_impl (const int8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const int8_t *weights, int8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 
void sme2_s8q_planar_3x3_s2_4rows_dot_za_impl (const int8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const int8_t *weights, int8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 
void sme2_s8q_planar_5x5_s1_4rows_dot_za_impl (const int8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const int8_t *weights, int8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 
void sme2_s8q_planar_5x5_s2_4rows_dot_za_impl (const int8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const int8_t *weights, int8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 
void sme2_u8q_planar_3x3_s1_4rows_dot_za_impl (const uint8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const uint8_t *weights, uint8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 
void sme2_u8q_planar_3x3_s2_4rows_dot_za_impl (const uint8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const uint8_t *weights, uint8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 
void sme2_u8q_planar_5x5_s1_4rows_dot_za_impl (const uint8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const uint8_t *weights, uint8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 
void sme2_u8q_planar_5x5_s2_4rows_dot_za_impl (const uint8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const uint8_t *weights, uint8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 
void sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za_impl (const uint8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const int8_t *weights, uint8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 
void sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za_impl (const uint8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const int8_t *weights, uint8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 
void sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za_impl (const uint8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const int8_t *weights, uint8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 
void sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za_impl (const uint8_t *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_in_vl, unsigned int pad_top, unsigned int valid_input_rows, unsigned int pad_left, unsigned int valid_input_cols, const int8_t *weights, uint8_t **outptrs, const size_t *outlds, const size_t *outvllds, unsigned int output_cols, unsigned int start_channel, unsigned int valid_channels, const arm_gemm::Requantize32 &qp)
 

Function Documentation

◆ depthwise() [1/3]

template UniqueDepthwiseCommon<float> arm_conv::depthwise::depthwise ( const DepthwiseArgs &  ,
const Nothing  
)

◆ depthwise() [2/3]

template UniqueDepthwiseCommon< uint8_t, int8_t, uint8_t > depthwise ( const DepthwiseArgs &  ,
const Requantize32  
)

◆ depthwise() [3/3]

UniqueDepthwiseCommon<TInput, TWeight, TOutput> arm_conv::depthwise::depthwise ( const DepthwiseArgs &  args,
const OutputStage &  os 
)

Definition at line 140 of file depthwise_implementation.hpp.

141 {
142  const DepthwiseImplementation<TInput, TWeight, TOutput, OutputStage> *impl = nullptr;
143  const bool success = find_implementation<TInput, TWeight, TOutput, OutputStage>(args, os, impl);
144  return UniqueDepthwiseCommon<TInput, TWeight, TOutput>(success ? impl->get_instance(args, os) : nullptr);
145 }

References GemmTuner::args, and DepthwiseImplementation< TInput, TWeight, TOutput, OutputStage >::get_instance().

◆ depthwise_implementation_list()

const DepthwiseImplementation< uint8_t, int8_t, uint8_t, Requantize32 > * depthwise_implementation_list ( )
related

Definition at line 530 of file depthwise_fp32.cpp.

531 {
532  return depthwise_fp32_methods;
533 }

◆ find_implementation()

bool arm_conv::depthwise::find_implementation ( const DepthwiseArgs &  args,
const OutputStage &  os,
const DepthwiseImplementation< TInput, TWeight, TOutput, OutputStage > *&  selected 
)

Definition at line 71 of file depthwise_implementation.hpp.

76 {
77  selected = nullptr;
78  uint64_t best_cycle_estimate = UINT64_MAX;
79 
80  const auto *impl = depthwise_implementation_list<TInput, TWeight, TOutput, OutputStage>();
81  for (; impl->method != DepthwiseMethod::DEFAULT; impl++)
82  {
83  const bool has_cfg = (args.config != nullptr);
84  const auto &cfg = args.config;
85 
86  if (
87  !impl->get_is_supported(args, os) || // Problem is unsupported
88  (has_cfg && cfg->method != DepthwiseMethod::DEFAULT && cfg->method != impl->method) ||
89  (has_cfg && cfg->filter != "" && !std::strstr(impl->name, cfg->filter.c_str()))
90  )
91  {
92  continue;
93  }
94 
95  const auto cycle_estimate = impl->get_cycle_estimate(args, os);
96 
97  if (cycle_estimate == 0)
98  {
99  selected = impl;
100  break;
101  }
102 
103  if (selected == nullptr || cycle_estimate < best_cycle_estimate)
104  {
105  selected = impl;
106  best_cycle_estimate = cycle_estimate;
107  }
108  }
109 
110  return (selected != nullptr);
111 }

References GemmTuner::args.

◆ get_compatible_kernels()

std::vector<KernelDescription> arm_conv::depthwise::get_compatible_kernels ( const DepthwiseArgs &  args,
const OutputStage &  os 
)

Definition at line 114 of file depthwise_implementation.hpp.

115 {
116  std::vector<KernelDescription> kerns;
117 
118  // Find the default implementation so we can flag it accordingly
119  const DepthwiseImplementation<TInput, TWeight, TOutput, OutputStage> *default_impl;
120  find_implementation<TInput, TWeight, TOutput, OutputStage>(args, os, default_impl);
121 
122  for (auto impl = depthwise_implementation_list<TInput, TWeight, TOutput, OutputStage>();
123  impl->method != DepthwiseMethod::DEFAULT; impl++)
124  {
125  if (!impl->get_is_supported(args, os))
126  {
127  continue;
128  }
129 
130  kerns.emplace_back(
131  impl->method, impl->name, impl == default_impl,
132  impl->get_cycle_estimate(args, os)
133  );
134  }
135 
136  return kerns;
137 }

References GemmTuner::args, and DepthwiseImplementation< TInput, TWeight, TOutput, OutputStage >::get_cycle_estimate().

◆ get_compatible_kernels< float >()

template std::vector<KernelDescription> arm_conv::depthwise::get_compatible_kernels< float > ( const DepthwiseArgs &  ,
const Nothing  
)

◆ get_compatible_kernels< int8_t, int8_t, int8_t, Requantize32 >()

template std::vector<KernelDescription> arm_conv::depthwise::get_compatible_kernels< int8_t, int8_t, int8_t, Requantize32 > ( const DepthwiseArgs &  ,
const Requantize32  
)

◆ get_compatible_kernels< uint8_t, int8_t, uint8_t, Requantize32 >()

template std::vector<KernelDescription> arm_conv::depthwise::get_compatible_kernels< uint8_t, int8_t, uint8_t, Requantize32 > ( const DepthwiseArgs &  ,
const Requantize32  
)

◆ get_compatible_kernels< uint8_t, uint8_t, uint8_t, Requantize32 >()

template std::vector<KernelDescription> arm_conv::depthwise::get_compatible_kernels< uint8_t, uint8_t, uint8_t, Requantize32 > ( const DepthwiseArgs &  ,
const Requantize32  
)

◆ get_reduced_view_for_dilation()

std::tuple<size_t, size_t, size_t, size_t, size_t> arm_conv::depthwise::get_reduced_view_for_dilation ( size_t  out_size,
size_t  in_size,
const size_t  d,
const size_t  dilation_factor,
const size_t  kernel_size,
const size_t  stride,
const size_t  orig_pad_before 
)

Definition at line 35 of file depthwise_common.cpp.

38  {
39  // Get the valid output range
40  out_size = iceildiv(out_size - d, dilation_factor);
41 
42  // Compute the start offset and the amount of padding which applies to this
43  // portion of the work.
44  size_t start_pos = d * stride, pad_before = 0;
45  if (start_pos < orig_pad_before) {
46  pad_before = iceildiv(orig_pad_before - start_pos, dilation_factor);
47  }
48  start_pos += pad_before * dilation_factor - orig_pad_before;
49 
50  // Hence compute the valid input range
51  in_size = start_pos < in_size
52  ? iceildiv(in_size - start_pos, dilation_factor)
53  : 0;
54 
55  // Finally, compute the "after" padding
56  const size_t reqd_input = (out_size - 1) * stride + kernel_size;
57  size_t pad_after = 0;
58  if (reqd_input > (pad_before + in_size)) {
59  pad_after = reqd_input - (pad_before + in_size);
60  }
61 
62  return std::make_tuple(out_size, in_size, start_pos, pad_before, pad_after);
63 }

References arm_gemm::iceildiv().

◆ sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst_direct_impl()

void arm_conv::depthwise::sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst_direct_impl ( const unsigned int  n_tile_rows,
const unsigned int  n_tile_cols,
const __fp16 *  inptr,
int64_t  ld_input_row,
int64_t  ld_input_col,
__fp16 *  outptr,
int64_t  ld_output_row,
int64_t  ld_output_col,
const void *  params,
unsigned int  n_channels,
const __fp16  activation_min,
const __fp16  activation_max 
)

◆ sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst_indirect_impl()

void arm_conv::depthwise::sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst_indirect_impl ( const __fp16 *const *const  input_ptrs,
__fp16 *const *const  outptrs,
const void *  params,
unsigned int  n_channels,
const __fp16  activation_min,
const __fp16  activation_max 
)

◆ sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl()

void arm_conv::depthwise::sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst_direct_impl ( const unsigned int  n_tile_rows,
const unsigned int  n_tile_cols,
const __fp16 *  inptr,
int64_t  ld_input_row,
int64_t  ld_input_col,
__fp16 *  outptr,
int64_t  ld_output_row,
int64_t  ld_output_col,
const void *  params,
unsigned int  n_channels,
const __fp16  activation_min,
const __fp16  activation_max 
)

◆ sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst_indirect_impl()

void arm_conv::depthwise::sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst_indirect_impl ( const __fp16 *const *const  input_ptrs,
__fp16 *const *const  outptrs,
const void *  params,
unsigned int  n_channels,
const __fp16  activation_min,
const __fp16  activation_max 
)

◆ sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl()

void arm_conv::depthwise::sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl ( const unsigned int  n_tile_rows,
const unsigned int  n_tile_cols,
const __fp16 *  inptr,
int64_t  ld_input_row,
int64_t  ld_input_col,
__fp16 *  outptr,
int64_t  ld_output_row,
int64_t  ld_output_col,
const void *  params,
unsigned int  n_channels,
const __fp16  activation_min,
const __fp16  activation_max 
)

◆ sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl()

void arm_conv::depthwise::sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl ( const __fp16 *const *const  input_ptrs,
__fp16 *const *const  outptrs,
const void *  params,
unsigned int  n_channels,
const __fp16  activation_min,
const __fp16  activation_max 
)

◆ sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_direct_impl()

void arm_conv::depthwise::sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_direct_impl ( const unsigned int  n_tile_rows,
const unsigned int  n_tile_cols,
const __fp16 *  inptr,
int64_t  ld_input_row,
int64_t  ld_input_col,
__fp16 *  outptr,
int64_t  ld_output_row,
int64_t  ld_output_col,
const void *  params,
unsigned int  n_channels,
const __fp16  activation_min,
const __fp16  activation_max 
)

◆ sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_indirect_impl()

void arm_conv::depthwise::sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_indirect_impl ( const __fp16 *const *const  input_ptrs,
__fp16 *const *const  outptrs,
const void *  params,
unsigned int  n_channels,
const __fp16  activation_min,
const __fp16  activation_max 
)

◆ sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_direct_impl()

void arm_conv::depthwise::sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_direct_impl ( const unsigned int  n_tile_rows,
const unsigned int  n_tile_cols,
const __fp16 *  inptr,
int64_t  ld_input_row,
int64_t  ld_input_col,
__fp16 *  outptr,
int64_t  ld_output_row,
int64_t  ld_output_col,
const void *  params,
unsigned int  n_channels,
const __fp16  activation_min,
const __fp16  activation_max 
)

◆ sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_indirect_impl()

void arm_conv::depthwise::sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_indirect_impl ( const __fp16 *const *const  input_ptrs,
__fp16 *const *const  outptrs,
const void *  params,
unsigned int  n_channels,
const __fp16  activation_min,
const __fp16  activation_max 
)

◆ sme2_fp32_planar_3x3_s1_4rows_mla_za_impl()

void arm_conv::depthwise::sme2_fp32_planar_3x3_s1_4rows_mla_za_impl ( const float *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const float *  weights,
const float *  bias,
float **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
float  act_min,
float  act_max 
)

◆ sme2_fp32_planar_3x3_s2_4rows_mla_za_impl()

void arm_conv::depthwise::sme2_fp32_planar_3x3_s2_4rows_mla_za_impl ( const float *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const float *  weights,
const float *  bias,
float **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
float  act_min,
float  act_max 
)

◆ sme2_fp32_planar_5x5_s1_4rows_mla_za_impl()

void arm_conv::depthwise::sme2_fp32_planar_5x5_s1_4rows_mla_za_impl ( const float *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const float *  weights,
const float *  bias,
float **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
float  act_min,
float  act_max 
)

◆ sme2_fp32_planar_5x5_s2_4rows_mla_za_impl()

void arm_conv::depthwise::sme2_fp32_planar_5x5_s2_4rows_mla_za_impl ( const float *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const float *  weights,
const float *  bias,
float **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
float  act_min,
float  act_max 
)

◆ sme2_fp32bf16fp32_planar_3x3_s1_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_fp32bf16fp32_planar_3x3_s1_4rows_dot_za_impl ( const float *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const float *  weights,
const float *  bias,
float **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
float  act_min,
float  act_max 
)

◆ sme2_fp32bf16fp32_planar_3x3_s2_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_fp32bf16fp32_planar_3x3_s2_4rows_dot_za_impl ( const float *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const float *  weights,
const float *  bias,
float **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
float  act_min,
float  act_max 
)

◆ sme2_fp32bf16fp32_planar_5x5_s1_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_fp32bf16fp32_planar_5x5_s1_4rows_dot_za_impl ( const float *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const float *  weights,
const float *  bias,
float **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
float  act_min,
float  act_max 
)

◆ sme2_fp32bf16fp32_planar_5x5_s2_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_fp32bf16fp32_planar_5x5_s2_4rows_dot_za_impl ( const float *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const float *  weights,
const float *  bias,
float **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
float  act_min,
float  act_max 
)

◆ sme2_s8q_planar_3x3_s1_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_s8q_planar_3x3_s1_4rows_dot_za_impl ( const int8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const int8_t *  weights,
int8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)

◆ sme2_s8q_planar_3x3_s2_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_s8q_planar_3x3_s2_4rows_dot_za_impl ( const int8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const int8_t *  weights,
int8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)

◆ sme2_s8q_planar_5x5_s1_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_s8q_planar_5x5_s1_4rows_dot_za_impl ( const int8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const int8_t *  weights,
int8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)

◆ sme2_s8q_planar_5x5_s2_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_s8q_planar_5x5_s2_4rows_dot_za_impl ( const int8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const int8_t *  weights,
int8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)

◆ sme2_u8q_planar_3x3_s1_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_u8q_planar_3x3_s1_4rows_dot_za_impl ( const uint8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const uint8_t *  weights,
uint8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)

◆ sme2_u8q_planar_3x3_s2_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_u8q_planar_3x3_s2_4rows_dot_za_impl ( const uint8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const uint8_t *  weights,
uint8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)

◆ sme2_u8q_planar_5x5_s1_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_u8q_planar_5x5_s1_4rows_dot_za_impl ( const uint8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const uint8_t *  weights,
uint8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)

◆ sme2_u8q_planar_5x5_s2_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_u8q_planar_5x5_s2_4rows_dot_za_impl ( const uint8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const uint8_t *  weights,
uint8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)

◆ sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za_impl ( const uint8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const int8_t *  weights,
uint8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)

◆ sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za_impl ( const uint8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const int8_t *  weights,
uint8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)

◆ sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za_impl ( const uint8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const int8_t *  weights,
uint8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)

◆ sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za_impl()

void arm_conv::depthwise::sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za_impl ( const uint8_t *  inptr,
size_t  ld_in_row,
size_t  ld_in_col,
size_t  ld_in_vl,
unsigned int  pad_top,
unsigned int  valid_input_rows,
unsigned int  pad_left,
unsigned int  valid_input_cols,
const int8_t *  weights,
uint8_t **  outptrs,
const size_t *  outlds,
const size_t *  outvllds,
unsigned int  output_cols,
unsigned int  start_channel,
unsigned int  valid_channels,
const arm_gemm::Requantize32 qp 
)
GemmTuner.args
args
Definition: GemmTuner.py:679
arm_gemm::iceildiv
T iceildiv(const T a, const T b)
Definition: utils.hpp:65