Compute Library
 23.08
arm_conv::addressing Namespace Reference

Functions

void fill_pointer_array (size_t element_size, void **dest_raw, const unsigned int array_rows, const unsigned int array_cols, void *base_ptr_raw, size_t ld_row, size_t ld_col, void *pad_buffer_raw, const unsigned int pad_top, const unsigned int valid_rows, const unsigned int pad_left, const unsigned int valid_cols)
 
void fill_pointer_array_generic_kernel (const size_t element_size, void **dest_raw, const unsigned int output_rows, const unsigned int output_cols, const unsigned int kernel_rows, const unsigned int kernel_cols, const unsigned int stride_rows, const unsigned int stride_cols, void *base_ptr_raw, size_t ld_row, size_t ld_col, void *pad_buffer_raw, const unsigned int pad_top, const unsigned int valid_rows, const unsigned int pad_left, const unsigned int valid_cols)
 
void fill_nchw_patch_array (size_t element_size, const void **dest_row_pointers_raw, void *dest_patch_raw, const unsigned int patch_rows, unsigned int patch_cols, const void *src_ptr_raw, size_t ld_row, size_t ld_col, const void *pad_row, const unsigned int pad_top, const unsigned int valid_rows, const unsigned int pad_left, const unsigned int valid_cols)
 
void fill_patch_array_generic_kernel (size_t element_size, const void **dest_pointers_raw, void *patch_raw, const unsigned int output_rows, const unsigned int output_cols, const unsigned int kernel_rows, const unsigned int kernel_cols, const unsigned int stride_rows, const unsigned int stride_cols, const void *src_ptr_raw, size_t ld_row, size_t ld_col, const void *pad_row, const unsigned int pad_top, const unsigned int valid_rows, const unsigned int pad_left, const unsigned int valid_cols)
 

Function Documentation

◆ fill_nchw_patch_array()

void fill_nchw_patch_array ( size_t  element_size,
const void **  dest_row_pointers_raw,
void *  dest_patch_raw,
const unsigned int  patch_rows,
unsigned int  patch_cols,
const void *  src_ptr_raw,
size_t  ld_row,
size_t  ld_col,
const void *  pad_row,
const unsigned int  pad_top,
const unsigned int  valid_rows,
const unsigned int  pad_left,
const unsigned int  valid_cols 
)

Definition at line 171 of file addressing.cpp.

181 {
182  // Convert into more useful types
183  auto row_pointers = reinterpret_cast<const char **>(dest_row_pointers_raw);
184  auto dest_patch = reinterpret_cast<char *>(dest_patch_raw);
185  auto src = reinterpret_cast<const char *>(src_ptr_raw);
186  ld_row *= element_size;
187  ld_col *= element_size;
188 
189  // Round up the patch columns to be a full quad
190  patch_cols = arm_gemm::roundup<unsigned int>(patch_cols, 16 / element_size);
191 
192  const auto last_valid_row = std::min(pad_top + valid_rows, patch_rows);
193  const auto last_valid_col = std::min(pad_left + valid_cols, patch_cols);
194 
195  // Construct the patch and row pointer array together
196  unsigned int i = 0;
197  for (; i < pad_top; i++)
198  {
199  // Insert pointers into the padding row
200  *(row_pointers++) = reinterpret_cast<const char *>(pad_row);
201  }
202  for (; i < last_valid_row; i++)
203  {
204  // Get a copy of the pointer for this row
205  auto colptr = src;
206  src += ld_row;
207 
208  // If the input is already in NCHW format (ld_col == element_size) AND
209  // there is no padding, then we just use a pointer to the source tensor;
210  // otherwise we need to construct a patch and provide a pointer to it.
211  if (ld_col == element_size && pad_left == 0 && last_valid_col == patch_cols)
212  {
213  *(row_pointers++) = colptr;
214  }
215  else
216  {
217  auto patch_col = dest_patch;
218  *(row_pointers++) = dest_patch;
219  dest_patch += element_size * patch_cols; // Move the patch pointer on
220 
221  // Construct the patch; fill the entirety with padding and then copy in
222  // the valid elements.
223  memcpy(patch_col, pad_row, element_size * patch_cols);
224  patch_col += pad_left * element_size; // Move over the left padding
225 
226  if (ld_col == element_size)
227  {
228  // If the input is NCHW then copy across as many columns as we can.
229  memcpy(patch_col, colptr, (last_valid_col - pad_left) * element_size);
230  }
231  else
232  {
233  // If the input is NHWC then copy columns across in turn.
234  for (auto j = pad_left; j < last_valid_col; j++)
235  {
236  memcpy(patch_col, colptr, element_size); // Copy the valid element
237  patch_col += element_size; // Progress the patch destination
238  colptr += ld_col; // Progress the patch source
239  }
240  }
241  }
242  }
243  for (; i < patch_rows; i++)
244  {
245  // Insert pointers into the padding row
246  *(row_pointers++) = reinterpret_cast<const char *>(pad_row);
247  }
248 }

References arm_compute::test::validation::src.

Referenced by PrepareInputSample< false >::execute().

◆ fill_patch_array_generic_kernel()

void fill_patch_array_generic_kernel ( size_t  element_size,
const void **  dest_pointers_raw,
void *  patch_raw,
const unsigned int  output_rows,
const unsigned int  output_cols,
const unsigned int  kernel_rows,
const unsigned int  kernel_cols,
const unsigned int  stride_rows,
const unsigned int  stride_cols,
const void *  src_ptr_raw,
size_t  ld_row,
size_t  ld_col,
const void *  pad_row,
const unsigned int  pad_top,
const unsigned int  valid_rows,
const unsigned int  pad_left,
const unsigned int  valid_cols 
)

Definition at line 259 of file addressing.cpp.

271 {
272  auto dest = reinterpret_cast<const char **>(dest_pointers_raw);
273  auto patch = reinterpret_cast<char *>(patch_raw);
274  auto src_ptr = reinterpret_cast<const char *>(src_ptr_raw);
275  ld_row *= element_size;
276  ld_col *= element_size;
277 
278  // Round up the patch columns to a multiple of quad-length
279  const auto patch_cols = arm_gemm::roundup<unsigned int>(output_cols, 16 / element_size);
280 
281  const auto input_rows = kernel_rows + (output_rows - 1) * stride_rows;
282  const auto last_valid_row = std::min(pad_top + valid_rows, input_rows);
283 
284  const auto input_cols = kernel_cols + (output_cols - 1) * stride_cols;
285  const auto last_valid_col = std::min(pad_left + valid_cols, input_cols);
286 
287  for (auto ki = 0u; ki < kernel_rows; ki++)
288  {
289  for (auto kj = 0u; kj < kernel_cols; kj++)
290  {
291  auto oi = 0u, ii = ki;
292  for (; oi < output_rows && ii < pad_top; oi++, ii += stride_rows)
293  {
294  // Insert a pointer to the padding row
295  *(dest++) = reinterpret_cast<const char *>(pad_row);
296  }
297  for (; oi < output_rows && ii < last_valid_row; oi++, ii += stride_rows)
298  {
299  auto rowptr = src_ptr + (ii - pad_top) * ld_row;
300 
301  // Construct a sample of the input here
302  auto patch_pos = patch;
303  *(dest++) = patch;
304  patch += patch_cols * element_size;
305 
306  // Fill with padding
307  memcpy(patch_pos, pad_row, patch_cols * element_size);
308 
309  // Fill in the valid elements
310  auto oj = 0u, ij = kj;
311  for (; oj < patch_cols && ij < pad_left; oj++, ij += stride_cols)
312  {
313  // Do nothing for padding
314  patch_pos += element_size;
315  }
316  for (; oj < patch_cols && ij < last_valid_col; oj++, ij += stride_cols)
317  {
318  // Copy from the source tensor
319  memcpy(patch_pos, rowptr + (ij - pad_left)*ld_col, element_size);
320  patch_pos += element_size;
321  }
322  // No action required for right-hand padding
323  }
324  for (; oi < output_rows; oi++)
325  {
326  *(dest++) = reinterpret_cast<const char *>(pad_row);
327  }
328  }
329  }
330 }

References GemmTuner::dest.

Referenced by PrepareInputSample< true >::execute().

◆ fill_pointer_array()

void fill_pointer_array ( size_t  element_size,
void **  dest_raw,
const unsigned int  array_rows,
const unsigned int  array_cols,
void *  base_ptr_raw,
size_t  ld_row,
size_t  ld_col,
void *  pad_buffer_raw,
const unsigned int  pad_top,
const unsigned int  valid_rows,
const unsigned int  pad_left,
const unsigned int  valid_cols 
)

Definition at line 33 of file addressing.cpp.

41 {
42  auto dest = reinterpret_cast<char **>(dest_raw);
43  auto base_ptr = reinterpret_cast<char *>(base_ptr_raw);
44  auto pad_buffer = reinterpret_cast<char *>(pad_buffer_raw);
45  ld_row *= element_size;
46  ld_col *= element_size;
47 
48  const auto last_valid_row = std::min(pad_top + valid_rows, array_rows);
49  const auto last_valid_col = std::min(pad_left + valid_cols, array_cols);
50 
51  unsigned int i = 0;
52  for (; i < pad_top; i++)
53  {
54  for (unsigned int j = 0; j < array_cols; j++)
55  {
56  *(dest++) = pad_buffer;
57  }
58  }
59  for (; i < last_valid_row; i++)
60  {
61  unsigned int j = 0;
62  auto colptr = base_ptr;
63  base_ptr += ld_row;
64 
65  for (; j < pad_left; j++)
66  {
67  *(dest++) = pad_buffer;
68  }
69  for (; j < last_valid_col; j++)
70  {
71  *(dest++) = colptr;
72  colptr += ld_col;
73  }
74  for (; j < array_cols; j++)
75  {
76  *(dest++) = pad_buffer;
77  }
78  }
79  for (; i < array_rows; i++)
80  {
81  for (unsigned int j = 0; j < array_cols; j++)
82  {
83  *(dest++) = pad_buffer;
84  }
85  }
86 }

References GemmTuner::dest.

Referenced by DepthwiseDepthfirstMultiplier< TInput, TWeight, TOutput, TAccum, is_generic, OutputStage >::compute_tile_padded().

◆ fill_pointer_array_generic_kernel()

void fill_pointer_array_generic_kernel ( const size_t  element_size,
void **  dest_raw,
const unsigned int  output_rows,
const unsigned int  output_cols,
const unsigned int  kernel_rows,
const unsigned int  kernel_cols,
const unsigned int  stride_rows,
const unsigned int  stride_cols,
void *  base_ptr_raw,
size_t  ld_row,
size_t  ld_col,
void *  pad_buffer_raw,
const unsigned int  pad_top,
const unsigned int  valid_rows,
const unsigned int  pad_left,
const unsigned int  valid_cols 
)

Definition at line 89 of file addressing.cpp.

100 {
101  auto dest = reinterpret_cast<char **>(dest_raw);
102  auto base_ptr = reinterpret_cast<char *>(base_ptr_raw);
103  auto pad_buffer = reinterpret_cast<char *>(pad_buffer_raw);
104  ld_row *= element_size;
105  ld_col *= element_size;
106 
107  const auto last_valid_row = pad_top + valid_rows;
108  const auto last_valid_col = pad_left + valid_cols;
109  const auto point_stride = output_rows * output_cols;
110 
111  // Iterate over the output points, after every point increment the pointer
112  // into the address array.
113  for (unsigned int oi = 0; oi < output_rows; oi++)
114  {
115  for (unsigned int oj = 0; oj < output_cols; oj++)
116  {
117  auto point_dest = dest;
118  dest++;
119 
120  // Iterate over kernel points and fill in the pointer array.
121  unsigned int ki = 0, ii = oi*stride_rows;
122  for (; ii < pad_top && ki < kernel_rows; ii++, ki++)
123  {
124  // Fill with padding
125  for (unsigned int j = 0; j < kernel_cols; j++)
126  {
127  *point_dest = pad_buffer;
128  point_dest += point_stride;
129  }
130  }
131  for (; ii < last_valid_row && ki < kernel_rows; ii++, ki++)
132  {
133  unsigned int kj = 0, ij = oj*stride_cols;
134  for (; ij < pad_left && kj < kernel_cols; ij++, kj++)
135  {
136  // Padding
137  *point_dest = pad_buffer;
138  point_dest += point_stride;
139  }
140  for (; ij < last_valid_col && kj < kernel_cols; ij++, kj++)
141  {
142  *point_dest = base_ptr + (ii - pad_top)*ld_row + (ij - pad_left)*ld_col;
143  point_dest += point_stride;
144  }
145  for (; kj < kernel_cols; kj++)
146  {
147  // Padding
148  *point_dest = pad_buffer;
149  point_dest += point_stride;
150  }
151  }
152  for (; ki < kernel_rows; ki++)
153  {
154  // Fill with padding
155  for (unsigned int j = 0; j < kernel_cols; j++)
156  {
157  *point_dest = pad_buffer;
158  point_dest += point_stride;
159  }
160  }
161  }
162  }
163 }

References GemmTuner::dest.

arm_compute::test::validation::src
SimpleTensor< float > src
Definition: DFT.cpp:155
GemmTuner.dest
dest
Definition: GemmTuner.py:644