23.11
|
Go to the documentation of this file.
44 template <
typename T,
typename TW,
typename TB>
51 const int width_in =
src.shape().x();
52 const int height_in =
src.shape().y();
53 const int depth_in =
src.shape().z();
54 const int width_out =
dst.shape().x();
55 const int height_out =
dst.shape().y();
56 const int depth_out =
dst.shape().z();
57 const int width_weights = weights.
shape().x();
58 const int height_weights = weights.
shape().y();
59 const int depth_weights = weights.
shape().z();
60 const int pad_left =
info.pad_left();
61 const int pad_top =
info.pad_top();
62 const int stride_xi =
info.stride().first;
63 const int stride_yi =
info.stride().second;
67 const int start_xi = (dilation.
x() * (width_weights - 1) + 1) / 2 - pad_left;
68 const int start_yi = (dilation.
y() * (height_weights - 1) + 1) / 2 - pad_top;
69 const int end_xi =
output_wh.first * stride_xi;
70 const int end_yi =
output_wh.second * stride_yi;
71 const int num_batches =
src.shape().total_size() / (width_in * height_in * depth_in);
73 #if defined(_OPENMP) && !( defined(__arm__) && defined(__ANDROID__))
74 #pragma omp parallel for collapse(5)
76 for(
int r = 0; r < num_batches; ++r)
78 for(
int yi = start_yi; yi < start_yi + end_yi; yi += stride_yi)
80 for(
int xi = start_xi; xi < start_xi + end_xi; xi += stride_xi)
82 for(
int group = 0; group < static_cast<int>(
num_groups); ++group)
84 for(
int ofm = 0; ofm < static_cast<int>(depth_out /
num_groups); ++ofm)
87 const int offset_in = r * width_in * height_in * depth_in + (group * (depth_in /
num_groups) * width_in * height_in);
88 const int xo = (xi - start_xi) / stride_xi;
89 const int yo = (yi - start_yi) / stride_yi;
90 const int offset_out = xo + yo * width_out + ((ofm + group * (depth_out /
num_groups)) * width_out * height_out) + (r * width_out * height_out * depth_out);
91 const int offset_w = (ofm + group * (depth_out /
num_groups)) * width_weights * height_weights * depth_weights;
92 const int offset_b = (ofm + group * (depth_out /
num_groups));
99 offset_in, offset_w, offset_b, offset_out,
102 width_weights, height_weights, dilation.
x(), dilation.
y(), ofm);
110 template <
typename T,
typename TW,
typename TB>
117 out_quant_info =
src.quantization_info();
size_t y() const
Semantic accessor for height as y.
SimpleTensor< float > src
Quantization information.
std::pair< unsigned int, unsigned int > scaled_dimensions(int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation=Size2D(1U, 1U))
Returns expected width and height of output scaled tensor depending on dimensions rounding mode.
Class for specifying the size of an image or rectangle.
TensorShape shape() const override
Shape of the tensor.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
SimpleTensor< T > convolution_layer_nchw(const SimpleTensor< T > &src, const SimpleTensor< TW > &weights, const SimpleTensor< TB > &bias, SimpleTensor< T > &dst, const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups)
#define ARM_COMPUTE_ASSERT(cond)
size_t x() const
Semantic accessor for width as x.
SimpleTensor< T > convolution_layer(const SimpleTensor< T > &src, const SimpleTensor< TW > &weights, const SimpleTensor< TB > &bias, const TensorShape &output_shape, const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info)
Simple tensor object that stores elements in a consecutive chunk of memory.
const unsigned int num_groups
void convolution3d(const SimpleTensor< T > &in, const SimpleTensor< TW > &weights, const SimpleTensor< TB > &bias, SimpleTensor< T > &out, int i_offset, int w_offset, int b_offset, int o_offset, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x=1, int dilation_y=1, int filter_id=0)
Copyright (c) 2017-2023 Arm Limited.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)