Compute Library
 21.08
DepthwiseConvolutionLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "ConvolutionLayer.h"
27 #include "Utils.h"
28 
32 
34 
35 namespace arm_compute
36 {
37 namespace test
38 {
39 namespace validation
40 {
41 namespace reference
42 {
43 namespace
44 {
45 /** Perform a depthwise convolution for floating-point types
46  *
47  * - Three dimensions tensors
48  * - Third dimention is number of channels
49  * - Depths of input tensor and filter are equals
50  * - Padding, stride and output shape "match"
51  *
52  */
53 template <typename T>
54 SimpleTensor<T> depthwise_convolution_fp(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
55  unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
56 {
57  ARM_COMPUTE_UNUSED(out_quant_info);
58 
59  SimpleTensor<T> dst{ dst_shape, src.data_type(), 1 };
60 
61  // Compute reference
62  const int filter_width = weights.shape().x();
63  const int filter_height = weights.shape().y();
64  const int filter_plane = filter_width * filter_height;
65  const int input_width = src.shape().x();
66  const int input_height = src.shape().y();
67  const int input_depth = src.shape().z();
68  const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth);
69 
70  const int pad_left = conv_info.pad_left();
71  const int pad_top = conv_info.pad_top();
72 
73  const float patch_width = (filter_width + (dilation.x() - 1) * (filter_width - 1));
74  const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
75 
76  const int patch_half_width_floor = patch_width / 2;
77  const int patch_half_height_floor = patch_height / 2;
78 
79  const auto patch_half_width_ceil = static_cast<int>(std::ceil(patch_width / 2));
80  const auto patch_half_height_ceil = static_cast<int>(std::ceil(patch_height / 2));
81 
82  const int minimum_x = -pad_left + patch_half_width_floor;
83  const int minimum_y = -pad_top + patch_half_height_floor;
84  const int maximum_x = (conv_info.stride().first * (dst_shape[0] - 1));
85  const int maximum_y = (conv_info.stride().second * (dst_shape[1] - 1));
86 
87  const T border_value(0);
88 
89  int out_pos = 0;
90  for(int r = 0; r < num_batches; ++r)
91  {
92  for(int z = 0; z < input_depth; ++z)
93  {
94  for(unsigned int m = 0; m < depth_multiplier; ++m)
95  {
96  const int out_z = z * depth_multiplier + m;
97 
98  for(int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
99  {
100  for(int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
101  {
102  Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
103  size_t filter_offset = filter_plane * out_z;
104 
105  T val(0);
106  for(int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
107  {
108  for(int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
109  {
110  coords.set(0, i);
111  coords.set(1, j);
112  val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value);
113  ++filter_offset;
114  }
115  }
116 
117  dst[out_pos++] = saturate_cast<T>(val + *static_cast<const T *>(biases(Coordinates(out_z))));
118  }
119  }
120  }
121  }
122  }
123 
124  return dst;
125 }
126 
127 /** Perform a quantized depthwise convolution
128  *
129  * - Three dimensions tensors
130  * - Third dimention is number of channels
131  * - Depths of input tensor and filter are equals
132  * - Padding, stride and output shape "match"
133  * - QASYMM8/QASYMM8_SIGNED input, output
134  * - QASYMM8/QASYMM8_SIGNED or QSYMM8_PER_CHANNEL filter
135  *
136  */
137 template <typename T, typename TW, typename TB>
138 SimpleTensor<T> depthwise_convolution_quantized(const SimpleTensor<T> &src, const SimpleTensor<TW> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
139  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
140 {
141  // if no explicit quantization has been set you the same as src
142  const QuantizationInfo &dst_qinfo = out_quant_info.uniform().empty() ? src.quantization_info() : out_quant_info;
143  SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, dst_qinfo };
144 
145  // Create reference
146  const int input_offset = -src.quantization_info().uniform().offset;
147  const float input_scale = src.quantization_info().uniform().scale;
148  const int weights_offset = -weights.quantization_info().uniform().offset;
149  const int output_offset = dst_qinfo.uniform().offset;
150  const float output_scale = dst_qinfo.uniform().scale;
151 
152  const std::vector<float> weights_scale_vec = weights.quantization_info().scale();
153 
154  // Compute reference
155  const int filter_width = weights.shape().x();
156  const int filter_height = weights.shape().y();
157  const int filter_plane = filter_width * filter_height;
158  const int input_width = src.shape().x();
159  const int input_height = src.shape().y();
160  const int input_depth = src.shape().z();
161  const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth);
162 
163  const int pad_left = conv_info.pad_left();
164  const int pad_top = conv_info.pad_top();
165 
166  const float patch_width = (filter_width + (dilation.x() - 1) * (filter_width - 1));
167  const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
168 
169  const int patch_half_width_floor = patch_width / 2;
170  const int patch_half_height_floor = patch_height / 2;
171 
172  const auto patch_half_width_ceil = static_cast<int>(std::ceil(patch_width / 2));
173  const auto patch_half_height_ceil = static_cast<int>(std::ceil(patch_height / 2));
174 
175  const int minimum_x = -pad_left + patch_half_width_floor;
176  const int minimum_y = -pad_top + patch_half_height_floor;
177  const int maximum_x = (conv_info.stride().first * (dst_shape[0] - 1));
178  const int maximum_y = (conv_info.stride().second * (dst_shape[1] - 1));
179 
180  const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights.data_type());
181 
182  const int min = std::numeric_limits<T>::lowest();
183  const int max = std::numeric_limits<T>::max();
184 
185  int out_pos = 0;
186  for(int r = 0; r < num_batches; ++r)
187  {
188  for(int z = 0; z < input_depth; ++z)
189  {
190  for(unsigned int m = 0; m < depth_multiplier; ++m)
191  {
192  const int out_z = z * depth_multiplier + m;
193  const int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(out_z)));
194 
195  int output_multiplier = 0;
196  int output_shift = 0;
197  const float weights_scale = (is_quantized_per_channel) ? weights_scale_vec[out_z] : weights_scale_vec[0];
198  const float multiplier = input_scale * weights_scale / output_scale;
199  arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
200 
201  for(int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
202  {
203  for(int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
204  {
205  Coordinates coords(x, y, z, r);
206  int filter_offset = filter_plane * out_z;
207 
208  int32_t val = 0;
209  for(int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
210  {
211  for(int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
212  {
213  coords.set(0, i);
214  coords.set(1, j);
215  const auto in_val = tensor_elem_at<T>(src, coords, BorderMode::CONSTANT, -input_offset);
216  const TW w_val = *(weights.data() + filter_offset);
217  val += (in_val + input_offset) * (w_val + weights_offset);
218  ++filter_offset;
219  }
220  }
221  val += bias_val;
222  // Quantize down
223  val = quantize_down_scale_by_fixedpoint(val, output_multiplier, output_shift, output_offset, min, max);
224 
225  // Store the result
226  dst[out_pos++] = val;
227  }
228  }
229  }
230  }
231  }
232 
233  return dst;
234 }
235 } // namespace
236 
237 template <>
239  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
240 {
241  return depthwise_convolution_fp(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
242 }
243 
244 template <>
246  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
247 {
248  return depthwise_convolution_fp(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
249 }
250 
251 template <>
253  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
254 {
255  return depthwise_convolution_quantized<uint8_t, uint8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
256 }
257 
258 template <>
260  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
261 {
262  return depthwise_convolution_quantized<uint8_t, int8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
263 }
264 
265 template <>
267  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
268 {
269  return depthwise_convolution_quantized<int8_t, int8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
270 }
271 } // namespace reference
272 } // namespace validation
273 } // namespace test
274 } // namespace arm_compute
T tensor_elem_at(const SimpleTensor< T > &src, Coordinates coord, BorderMode border_mode, T constant_border_value)
Definition: Utils.h:63
Shape of a tensor.
Definition: TensorShape.h:39
const size_t input_depth
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2021 Arm Limited.
T x() const
Alias to access the size of the first dimension.
Definition: Dimensions.h:87
Quantization information.
const size_t input_width
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Definition: Utils.h:1058
Padding and stride information class.
Definition: Types.h:647
int32_t quantize_down_scale_by_fixedpoint(int32_t val, int32_t result_mult_int, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
Quantize down the input value in range [min, max].
Simple tensor object that stores elements in a consecutive chunk of memory.
Definition: SimpleTensor.h:58
SimpleTensor< float > depthwise_convolution(const SimpleTensor< float > &src, const SimpleTensor< float > &weights, const SimpleTensor< float > &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
Class for specifying the size of an image or rectangle.
Definition: Size2D.h:34
const size_t input_height
T saturate_cast(T val)
Saturate a value of type T against the numeric limits of type U.
Definition: Utils.h:312