54 SimpleTensor<T> depthwise_convolution_fp(
const SimpleTensor<T> &
src,
const SimpleTensor<T> &weights,
const SimpleTensor<T> &biases,
const TensorShape &
dst_shape,
const PadStrideInfo &
conv_info,
55 unsigned int depth_multiplier,
const Size2D &dilation,
const QuantizationInfo &out_quant_info)
62 const int filter_width = weights.shape().x();
63 const int filter_height = weights.shape().y();
64 const int filter_plane = filter_width * filter_height;
65 const int input_width =
src.shape().x();
66 const int input_height =
src.shape().y();
67 const int input_depth =
src.shape().z();
68 const int num_batches =
src.shape().total_size() / (input_width * input_height * input_depth);
70 const int pad_left =
conv_info.pad_left();
73 const float patch_width = (filter_width + (dilation.x() - 1) * (filter_width - 1));
74 const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
76 const int patch_half_width_floor = patch_width / 2;
77 const int patch_half_height_floor = patch_height / 2;
79 const auto patch_half_width_ceil =
static_cast<int>(std::ceil(patch_width / 2));
80 const auto patch_half_height_ceil =
static_cast<int>(std::ceil(patch_height / 2));
82 const int minimum_x = -pad_left + patch_half_width_floor;
83 const int minimum_y = -pad_top + patch_half_height_floor;
87 const T border_value(0);
90 for(
int r = 0; r < num_batches; ++r)
92 for(
int z = 0; z < input_depth; ++z)
94 for(
unsigned int m = 0; m < depth_multiplier; ++m)
96 const int out_z = z * depth_multiplier + m;
98 for(
int y = minimum_y; y <= minimum_y + maximum_y; y +=
conv_info.stride().second)
100 for(
int x = minimum_x; x <= minimum_x + maximum_x; x +=
conv_info.stride().first)
102 Coordinates coords(
static_cast<int>(x),
static_cast<int>(y),
static_cast<int>(z),
static_cast<int>(r));
103 size_t filter_offset = filter_plane * out_z;
106 for(
int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
108 for(
int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
117 dst[out_pos++] = saturate_cast<T>(val + *
static_cast<const T *
>(biases(Coordinates(out_z))));
137 template <
typename T,
typename TW,
typename TB>
138 SimpleTensor<T> depthwise_convolution_quantized(
const SimpleTensor<T> &
src,
const SimpleTensor<TW> &weights,
const SimpleTensor<int32_t> &biases,
const TensorShape &
dst_shape,
139 const PadStrideInfo &
conv_info,
unsigned int depth_multiplier,
const Size2D &dilation,
const QuantizationInfo &out_quant_info)
142 const QuantizationInfo &dst_qinfo = out_quant_info.uniform().empty() ?
src.quantization_info() : out_quant_info;
146 const int input_offset = -
src.quantization_info().uniform().offset;
147 const float input_scale =
src.quantization_info().uniform().scale;
148 const int weights_offset = -weights.quantization_info().uniform().offset;
149 const int output_offset = dst_qinfo.uniform().offset;
150 const float output_scale = dst_qinfo.uniform().scale;
152 const std::vector<float> weights_scale_vec = weights.quantization_info().scale();
155 const int filter_width = weights.shape().x();
156 const int filter_height = weights.shape().y();
157 const int filter_plane = filter_width * filter_height;
158 const int input_width =
src.shape().x();
159 const int input_height =
src.shape().y();
160 const int input_depth =
src.shape().z();
161 const int num_batches =
src.shape().total_size() / (input_width * input_height * input_depth);
163 const int pad_left =
conv_info.pad_left();
166 const float patch_width = (filter_width + (dilation.x() - 1) * (filter_width - 1));
167 const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
169 const int patch_half_width_floor = patch_width / 2;
170 const int patch_half_height_floor = patch_height / 2;
172 const auto patch_half_width_ceil =
static_cast<int>(std::ceil(patch_width / 2));
173 const auto patch_half_height_ceil =
static_cast<int>(std::ceil(patch_height / 2));
175 const int minimum_x = -pad_left + patch_half_width_floor;
176 const int minimum_y = -pad_top + patch_half_height_floor;
183 const int max = std::numeric_limits<T>::max();
186 for(
int r = 0; r < num_batches; ++r)
188 for(
int z = 0; z < input_depth; ++z)
190 for(
unsigned int m = 0; m < depth_multiplier; ++m)
192 const int out_z = z * depth_multiplier + m;
193 const int32_t bias_val = *
static_cast<const int32_t *
>(biases(Coordinates(out_z)));
195 int output_multiplier = 0;
196 int output_shift = 0;
197 const float weights_scale = (is_quantized_per_channel) ? weights_scale_vec[out_z] : weights_scale_vec[0];
198 const float multiplier = input_scale * weights_scale / output_scale;
201 for(
int y = minimum_y; y <= minimum_y + maximum_y; y +=
conv_info.stride().second)
203 for(
int x = minimum_x; x <= minimum_x + maximum_x; x +=
conv_info.stride().first)
205 Coordinates coords(x, y, z, r);
206 int filter_offset = filter_plane * out_z;
209 for(
int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
211 for(
int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
216 const TW w_val = *(weights.data() + filter_offset);
217 val += (in_val + input_offset) * (w_val + weights_offset);
226 dst[out_pos++] = val;
241 return depthwise_convolution_fp(
src, weights, biases,
dst_shape,
conv_info, depth_multiplier, dilation, out_quant_info);
248 return depthwise_convolution_fp(
src, weights, biases,
dst_shape,
conv_info, depth_multiplier, dilation, out_quant_info);
255 return depthwise_convolution_quantized<uint8_t, uint8_t, int32_t>(
src, weights, biases,
dst_shape,
conv_info, depth_multiplier, dilation, out_quant_info);
262 return depthwise_convolution_quantized<uint8_t, int8_t, int32_t>(
src, weights, biases,
dst_shape,
conv_info, depth_multiplier, dilation, out_quant_info);
269 return depthwise_convolution_quantized<int8_t, int8_t, int32_t>(
src, weights, biases,
dst_shape,
conv_info, depth_multiplier, dilation, out_quant_info);