Compute Library
 22.08
DepthwiseDepthfirstQuantized< strategy > Class Template Reference

#include <depthwise_depthfirst_quantized.hpp>

Collaboration diagram for DepthwiseDepthfirstQuantized< strategy >:
[legend]

Public Member Functions

 DepthwiseDepthfirstQuantized (const DepthwiseArgs &args, const arm_gemm::Requantize32 &qp)
 
 DepthwiseDepthfirstQuantized (DepthwiseDepthfirstQuantized &)=delete
 
DepthwiseDepthfirstQuantizedoperator= (DepthwiseDepthfirstQuantized &)=delete
 
size_t get_storage_size (void) const override
 
void pack_parameters (void *buffer, const void *const bias, const void *weights, size_t ld_weight_col, size_t ld_weight_row) override
 
size_t get_working_size (const unsigned int n_threads, const unsigned int n_channels) const override
 
void execute (const unsigned int batches, const unsigned int input_height, const unsigned int input_width, const unsigned int input_channels, const PaddingValues &padding, const void *const _input, const size_t ld_input_col, const size_t ld_input_row, const size_t ld_input_batch, const void *const parameters, const unsigned int output_height, const unsigned int output_width, void *const _output, const size_t ld_output_col, const size_t ld_output_row, const size_t ld_output_batch, void *_working_space, const unsigned int thread_id, const unsigned int n_threads) const override
 

Detailed Description

template<class strategy>
class arm_conv::depthwise::DepthwiseDepthfirstQuantized< strategy >

Definition at line 109 of file depthwise_depthfirst_quantized.hpp.

Constructor & Destructor Documentation

◆ DepthwiseDepthfirstQuantized() [1/2]

DepthwiseDepthfirstQuantized ( const DepthwiseArgs &  args,
const arm_gemm::Requantize32 qp 
)
inline

Definition at line 166 of file depthwise_depthfirst_quantized.hpp.

References DepthwiseDepthfirstQuantized< strategy >::operator=().

167  : DepthwiseCommon<TInput, TWeight, TOutput>(args), m_qp(qp)
168  {
169  }

◆ DepthwiseDepthfirstQuantized() [2/2]

Member Function Documentation

◆ execute()

void execute ( const unsigned int  batches,
const unsigned int  input_height,
const unsigned int  input_width,
const unsigned int  input_channels,
const PaddingValues &  padding,
const void *const  _input,
const size_t  ld_input_col,
const size_t  ld_input_row,
const size_t  ld_input_batch,
const void *const  parameters,
const unsigned int  output_height,
const unsigned int  output_width,
void *const  _output,
const size_t  ld_output_col,
const size_t  ld_output_row,
const size_t  ld_output_batch,
void *  _working_space,
const unsigned int  thread_id,
const unsigned int  n_threads 
) const
inlineoverride

Definition at line 210 of file depthwise_depthfirst_quantized.hpp.

References Requantize32::a_offset, arm_compute::test::validation::batch, batches, Requantize32::bias, DepthwiseDepthfirstQuantized< strategy >::get_working_size(), arm_gemm::iceildiv(), arm_compute::test::validation::if(), input_buffer, outptr_array, output_buffer, Requantize32::per_channel_muls, Requantize32::per_channel_requant, Requantize32::per_channel_right_shifts, Requantize32::per_layer_mul, Requantize32::per_layer_right_shift, and strategy.

231  {
232  strategy strat(this->m_args.cpu_info);
233 #ifdef CYCLE_PROFILING
234  arm_gemm::profiler prof;
235 #endif
236  // Get a unified API for the kernel function
237  auto kernel = get_unified_kernel<TInput, TWeight, TOutput>(strat.kernel);
238 
239  // Determine what portion of the work to do.
240  const unsigned int n_rows_per_thread = arm_gemm::iceildiv(output_height, n_threads);
241  const int start_out_height = std::min(thread_id * n_rows_per_thread, output_height);
242  const int end_out_height = std::min(start_out_height + n_rows_per_thread, output_height);
243 
244  // Cast input and output pointers into the right types
245  const TInput *const inptr = static_cast<const TInput *>(_input);
246  TOutput *const outptr = static_cast<TOutput *>(_output);
247 
248  // Create an array for the input pointers
249  const TInput * _inptr_array[strategy::input_rows * strategy::input_cols];
250  const TInput **const inptr_array = _inptr_array;
251 
252  // Create an array for the output pointers
253  TOutput * _outptr_array[strategy::output_rows * strategy::output_cols];
254  TOutput **const outptr_array = _outptr_array;
255 
256  // Allocate portions of the working space
257  uint8_t *working_space = static_cast<uint8_t *>(_working_space) + get_working_size(thread_id, input_channels);
258 
259  TOutput *const output_buffer = reinterpret_cast<TOutput *>(working_space);
260  working_space += sizeof_output_buffer(input_channels * this->m_args.channel_multiplier);
261 
262  TInput *const input_buffer = reinterpret_cast<TInput *>(working_space);
263  working_space += sizeof_input_buffer(input_channels);
264 
265  const int32_t *const bias_ptr = (m_qp.bias == nullptr) ? reinterpret_cast<int32_t *>(working_space)
266  : m_qp.bias;
267  working_space += sizeof_bias_buffer(input_channels * this->m_args.channel_multiplier);
268 
269  const int32_t *const requant_mul_vec = !m_qp.per_channel_requant ? reinterpret_cast<int32_t *>(working_space)
270  : m_qp.per_channel_muls;
271  working_space += sizeof_requant_mul_buffer(input_channels * this->m_args.channel_multiplier);
272 
273  const int32_t *const requant_shift_vec = !m_qp.per_channel_requant ? reinterpret_cast<int32_t *>(working_space)
275 
276  if (strategy_requires_unravelled_bias_and_quant_params<strategy>())
277  {
278  // Initialise the bias buffer
279  if (m_qp.bias == nullptr)
280  {
281  for (unsigned int c = 0; c < input_channels * this->m_args.channel_multiplier; c++)
282  {
283  const_cast<int32_t *>(bias_ptr)[c] = 0;
284  }
285  }
286 
287  // Initialise the requantisation parameters
288  if (!m_qp.per_channel_requant)
289  {
290  for (unsigned int c = 0; c < input_channels * this->m_args.channel_multiplier; c++)
291  {
292  const_cast<int32_t *>(requant_mul_vec)[c] = m_qp.per_layer_mul;
293  const_cast<int32_t *>(requant_shift_vec)[c] = m_qp.per_layer_right_shift;
294  }
295  }
296  }
297 
298  // Initialise the input buffer
299  for (unsigned int c = 0; c < input_channels; c++)
300  {
301  input_buffer[c] = static_cast<TInput>(m_qp.a_offset);
302  }
303 
304  // For each output tile, construct the requisite set of pointers and call
305  // into the kernel.
306  for (unsigned int batch = 0; batch < batches; batch++)
307  {
308  // Get batch pointers
309  const auto inptr_batch = inptr + batch * ld_input_batch;
310  const auto outptr_batch = outptr + batch * ld_output_batch;
311 
312  for (int start_out_i = start_out_height;
313  start_out_i < end_out_height;
314  start_out_i += static_cast<int>(strategy::output_rows))
315  {
316  const int end_out_i = start_out_i + strategy::output_rows;
317  const int start_in_i = start_out_i * strategy::stride_rows - padding.top;
318  const int end_in_i = start_in_i + strategy::input_rows;
319 
320  // Compute top/bottom padding
321  const auto pad_top = static_cast<unsigned int>(-std::min(start_in_i, 0));
322  const auto pad_bottom = static_cast<unsigned int>(-std::min(static_cast<int>(input_height) - end_in_i, 0));
323  const unsigned int valid_output_rows = std::min(
324  end_out_i - start_out_i,
325  static_cast<int>(output_height) - start_out_i
326  );
327 
328  // Fill the input pointer array with padding values
329  for (auto index = 0u; index < strategy::input_rows * strategy::input_cols; index++)
330  {
331  inptr_array[index] = input_buffer;
332  }
333 
334  for (int start_out_j = 0; start_out_j < static_cast<int>(output_width);)
335  {
336  const int start_in_j = start_out_j * strategy::stride_cols - this->m_args.padding.left;
337  const int pad_left = -std::min(0, start_in_j);
338 
339  const int end_out_j = start_out_j + strategy::output_cols;
340  const int end_in_j = start_in_j + strategy::input_cols;
341 
342  const auto pad_right = static_cast<unsigned int>(-std::min(static_cast<int>(input_width) - end_in_j, 0));
343  const unsigned int valid_output_cols = std::min(
344  end_out_j - start_out_j,
345  static_cast<int>(output_width) - start_out_j
346  );
347 
348  // Construct the input pointer array - fill the array with pointers to
349  // the input buffer and then fill in the required values.
350  for (auto i = pad_top; i < strategy::input_rows - pad_bottom; i++)
351  {
352  // Can skip over the left padding because we will have either the
353  // same or less than the previous tile.
354  unsigned int j = pad_left;
355  const TInput *colptr = inptr_batch + (start_in_i + i) * ld_input_row + (start_in_j + j) * ld_input_col;
356  const TInput **ptrs = inptr_array + i * strategy::input_cols + j;
357  for (; j < strategy::input_cols - pad_right; j++)
358  {
359  *(ptrs++) = colptr;
360  colptr += ld_input_col;
361  }
362  for (; j < strategy::input_cols; j++)
363  {
364  *(ptrs++) = input_buffer;
365  }
366  }
367 
368  // Construct the output pointer array.
369  TOutput **outptr_pos = outptr_array;
370  for (auto i = 0u; i < valid_output_rows; i++)
371  {
372  unsigned int j = 0u;
373  TOutput *colptr = outptr_batch + (start_out_i + i) * ld_output_row + start_out_j * ld_output_col;
374  for (; j < valid_output_cols; j++)
375  {
376  *(outptr_pos++) = colptr;
377  colptr += ld_output_col;
378  }
379  for (; j < strategy::output_cols; j++)
380  {
381  *(outptr_pos++) = output_buffer;
382  }
383  }
384  for (auto i = valid_output_rows; i < strategy::output_rows; i++)
385  {
386  for (auto j = 0u; j < strategy::output_cols; j++)
387  {
388  *(outptr_pos++) = output_buffer;
389  }
390  }
391 
392  start_out_j += strategy::output_cols;
393 
394 #ifdef CYCLE_PROFILING
395  // TODO Work number
396  auto p = prof.ScopedProfiler(PROFILE_KERNEL, (unsigned long)(strategy::output_rows * strategy::output_cols * this->m_args.kernel_rows * this->m_args.kernel_cols));
397 #endif
398  kernel(
399  this->m_args.input_channels,
400  inptr_array,
401  reinterpret_cast<const TWeight *>(parameters),
402  bias_ptr, m_qp, requant_mul_vec, requant_shift_vec,
403  outptr_array
404  );
405  }
406  }
407  }
408  }
const int32_t * bias
Definition: arm_gemm.hpp:172
T * input_buffer
T * output_buffer
T iceildiv(const T a, const T b)
Definition: utils.hpp:65
const size_t input_height
Definition: impl.cpp:61
int32_t per_layer_right_shift
Definition: arm_gemm.hpp:179
const size_t input_width
Definition: impl.cpp:62
std::unique_ptr< ParametersLibrary > parameters
Definition: Framework.cpp:46
const StratType * strategy
const int32_t * per_channel_right_shifts
Definition: arm_gemm.hpp:182
size_t get_working_size(const unsigned int n_threads, const unsigned int n_channels) const override
unsigned int batches
const int32_t * per_channel_muls
Definition: arm_gemm.hpp:183
T ** outptr_array

◆ get_storage_size()

size_t get_storage_size ( void  ) const
inlineoverride

Definition at line 174 of file depthwise_depthfirst_quantized.hpp.

175  {
176  return strategy::get_packed_size(this->m_args);
177  }

◆ get_working_size()

size_t get_working_size ( const unsigned int  n_threads,
const unsigned int  n_channels 
) const
inlineoverride

Definition at line 197 of file depthwise_depthfirst_quantized.hpp.

Referenced by DepthwiseDepthfirstQuantized< strategy >::execute().

198  {
199  const unsigned int n_output_channels = n_channels * this->m_args.channel_multiplier;
200  return n_threads * (
201  sizeof_output_buffer(n_output_channels) +
202  sizeof_input_buffer(n_channels) +
203  sizeof_bias_buffer(n_channels) +
204  sizeof_requant_mul_buffer(n_channels) +
205  sizeof_requant_shift_buffer(n_channels)
206  );
207  }

◆ operator=()

◆ pack_parameters()

void pack_parameters ( void *  buffer,
const void *const  bias,
const void *  weights,
size_t  ld_weight_col,
size_t  ld_weight_row 
)
inlineoverride

Definition at line 179 of file depthwise_depthfirst_quantized.hpp.

References Requantize32::bias, bias, and arm_conv::depthwise::interleaves::quantized::pack_parameters().

180  {
181  if (strategy_requires_unravelled_bias_and_quant_params<strategy>())
182  {
183  m_qp.bias = static_cast<const int32_t *>(bias);
184  }
185 
186  get_unified_packer<TWeight>(strategy::pack_parameters)(
187  this->m_args.input_channels,
188  buffer,
189  static_cast<const int32_t *>(bias),
190  reinterpret_cast<const TWeight *>(weights),
191  m_qp,
192  ld_weight_col,
193  ld_weight_row
194  );
195  }
const int32_t * bias
Definition: arm_gemm.hpp:172
void pack_parameters(void *_buffer, const int32_t *biases, const T *weights, size_t ld_weight_col, size_t ld_weight_row, const DepthwiseArgs &args, const arm_gemm::Requantize32 &qp, const arm_gemm::VLType vl_type, const unsigned int accumulator_depth_vl)
const int32_t * bias

The documentation for this class was generated from the following file: