31 #if defined(__aarch64__) && defined(__ARM_FP16_ARGS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
36 void a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_indirect_impl(
const __fp16 *
const *
const input_ptrs, __fp16 *
const *
const outptrs,
const void *params,
unsigned int n_channels,
const __fp16
activation_min,
const __fp16
activation_max);
37 void a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_direct_impl(
const unsigned int n_tile_rows,
const unsigned int n_tile_cols,
const __fp16 *inptr, int64_t ld_input_row, int64_t ld_input_col, __fp16 *outptr, int64_t ld_output_row, int64_t ld_output_col,
const void *params,
unsigned int n_channels,
const __fp16
activation_min,
const __fp16
activation_max);
39 class a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst :
public DepthwiseDepthfirstStrategy<__fp16, __fp16, __fp16, __fp16>
42 using Parent = DepthwiseDepthfirstStrategy<__fp16, __fp16, __fp16, __fp16>;
43 Parent::IndirectKernelType m_indirect_kernel = a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_indirect_impl;
44 Parent::DirectKernelType m_direct_kernel = a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst_direct_impl;
47 using return_type = __fp16;
50 constexpr
static unsigned int kernel_rows = 3;
51 constexpr
static unsigned int kernel_cols = 3;
53 constexpr
static unsigned int stride_rows = 2;
54 constexpr
static unsigned int stride_cols = 2;
56 constexpr
static unsigned int output_rows = 2;
57 constexpr
static unsigned int output_cols = 2;
59 a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst(
const CPUInfo *)
60 : Parent(output_rows, output_cols, kernel_rows, kernel_cols, stride_rows, stride_cols) {}
64 Parent::IndirectKernelType get_indirect_kernel()
const override {
return m_indirect_kernel; }
65 Parent::DirectKernelType get_direct_kernel()
const override {
return m_direct_kernel; }
71 #endif // defined(__aarch64__) && defined(__ARM_FP16_ARGS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)