54 SimpleTensor<T> depthwise_convolution_fp(
const SimpleTensor<T> &
src,
const SimpleTensor<T> &weights,
const SimpleTensor<T> &biases,
const TensorShape &
dst_shape,
const PadStrideInfo &
conv_info,
55 unsigned int depth_multiplier,
const Size2D &dilation,
const QuantizationInfo &out_quant_info)
62 const int filter_width = weights.shape().
x();
63 const int filter_height = weights.shape().y();
64 const int filter_plane = filter_width * filter_height;
68 const int num_batches = src.shape().total_size() / (input_width * input_height *
input_depth);
70 const int pad_left = conv_info.pad_left();
71 const int pad_top = conv_info.pad_top();
73 const float patch_width = (filter_width + (dilation.x() - 1) * (filter_width - 1));
74 const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
76 const int patch_half_width_floor = patch_width / 2;
77 const int patch_half_height_floor = patch_height / 2;
79 const auto patch_half_width_ceil =
static_cast<int>(std::ceil(patch_width / 2));
80 const auto patch_half_height_ceil =
static_cast<int>(std::ceil(patch_height / 2));
82 const int minimum_x = -pad_left + patch_half_width_floor;
83 const int minimum_y = -pad_top + patch_half_height_floor;
84 const int maximum_x = (conv_info.stride().first * (dst_shape[0] - 1));
85 const int maximum_y = (conv_info.stride().second * (dst_shape[1] - 1));
87 const T border_value(0);
90 for(
int r = 0; r < num_batches; ++r)
94 for(
unsigned int m = 0; m < depth_multiplier; ++m)
96 const int out_z = z * depth_multiplier + m;
98 for(
int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
100 for(
int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
102 Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
103 size_t filter_offset = filter_plane * out_z;
106 for(
int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
108 for(
int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
117 dst[out_pos++] =
saturate_cast<T>(val + *
static_cast<const T *
>(biases(Coordinates(out_z))));
137 template <
typename T,
typename TW,
typename TB>
138 SimpleTensor<T> depthwise_convolution_quantized(
const SimpleTensor<T> &src,
const SimpleTensor<TW> &weights,
const SimpleTensor<int32_t> &biases,
const TensorShape &dst_shape,
139 const PadStrideInfo &conv_info,
unsigned int depth_multiplier,
const Size2D &dilation,
const QuantizationInfo &out_quant_info)
142 const QuantizationInfo &dst_qinfo = out_quant_info.uniform().empty() ? src.quantization_info() : out_quant_info;
143 SimpleTensor<T>
dst{
dst_shape, src.data_type(), 1, dst_qinfo };
146 const int input_offset = -src.quantization_info().uniform().offset;
147 const float input_scale = src.quantization_info().uniform().scale;
148 const int weights_offset = -weights.quantization_info().uniform().offset;
149 const int output_offset = dst_qinfo.uniform().offset;
150 const float output_scale = dst_qinfo.uniform().scale;
152 const std::vector<float> weights_scale_vec = weights.quantization_info().scale();
155 const int filter_width = weights.shape().
x();
156 const int filter_height = weights.shape().y();
157 const int filter_plane = filter_width * filter_height;
158 const int input_width = src.shape().x();
159 const int input_height = src.shape().y();
160 const int input_depth = src.shape().z();
161 const int num_batches = src.shape().total_size() / (input_width * input_height *
input_depth);
163 const int pad_left = conv_info.pad_left();
164 const int pad_top = conv_info.pad_top();
166 const float patch_width = (filter_width + (dilation.x() - 1) * (filter_width - 1));
167 const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
169 const int patch_half_width_floor = patch_width / 2;
170 const int patch_half_height_floor = patch_height / 2;
172 const auto patch_half_width_ceil =
static_cast<int>(std::ceil(patch_width / 2));
173 const auto patch_half_height_ceil =
static_cast<int>(std::ceil(patch_height / 2));
175 const int minimum_x = -pad_left + patch_half_width_floor;
176 const int minimum_y = -pad_top + patch_half_height_floor;
177 const int maximum_x = (conv_info.stride().first * (dst_shape[0] - 1));
178 const int maximum_y = (conv_info.stride().second * (dst_shape[1] - 1));
183 const int max = std::numeric_limits<T>::max();
186 for(
int r = 0; r < num_batches; ++r)
190 for(
unsigned int m = 0; m < depth_multiplier; ++m)
192 const int out_z = z * depth_multiplier + m;
193 const int32_t bias_val = *
static_cast<const int32_t *
>(biases(Coordinates(out_z)));
195 int output_multiplier = 0;
196 int output_shift = 0;
197 const float weights_scale = (is_quantized_per_channel) ? weights_scale_vec[out_z] : weights_scale_vec[0];
198 const float multiplier = input_scale * weights_scale / output_scale;
201 for(
int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
203 for(
int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
205 Coordinates coords(x, y, z, r);
206 int filter_offset = filter_plane * out_z;
209 for(
int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
211 for(
int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
216 const TW w_val = *(weights.data() + filter_offset);
217 val += (in_val + input_offset) * (w_val + weights_offset);
226 dst[out_pos++] = val;
241 return depthwise_convolution_fp(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
248 return depthwise_convolution_fp(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
255 return depthwise_convolution_quantized<uint8_t, uint8_t, int32_t>(
src, weights, biases,
dst_shape,
conv_info, depth_multiplier, dilation, out_quant_info);
262 return depthwise_convolution_quantized<uint8_t, int8_t, int32_t>(
src, weights, biases,
dst_shape,
conv_info, depth_multiplier, dilation, out_quant_info);
269 return depthwise_convolution_quantized<int8_t, int8_t, int32_t>(
src, weights, biases,
dst_shape,
conv_info, depth_multiplier, dilation, out_quant_info);
T tensor_elem_at(const SimpleTensor< T > &src, Coordinates coord, BorderMode border_mode, T constant_border_value)
const size_t input_height
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
T x() const
Alias to access the size of the first dimension.
Quantization information.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
bool is_data_type_quantized_per_channel(DataType dt)
Check if a given data type is of per channel type.
Padding and stride information class.
int32_t quantize_down_scale_by_fixedpoint(int32_t val, int32_t result_mult_int, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max)
Quantize down the input value in range [min, max].
Simple tensor object that stores elements in a consecutive chunk of memory.
SimpleTensor< float > depthwise_convolution(const SimpleTensor< float > &src, const SimpleTensor< float > &weights, const SimpleTensor< float > &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
Class for specifying the size of an image or rectangle.
T saturate_cast(T val)
Saturate a value of type T against the numeric limits of type U.