42 const int stride_x =
conv_info.stride().first;
43 const int stride_y =
conv_info.stride().second;
44 const int kernel_width = kernel_dims.
width;
45 const int kernel_height = kernel_dims.
height;
48 const int src_width =
src.shape().x();
49 const int src_height =
src.shape().y();
50 const int src_channels =
src.shape().z();
51 const int batches =
src.shape().total_size_upper(3);
52 const int dst_height =
dst.shape().y();
61 for(
int g = 0; g < static_cast<int>(
num_groups); ++g)
63 const int first_group_ch = g * (src_channels /
num_groups);
64 const int last_group_ch = (g + 1) * (src_channels /
num_groups);
66 for(
int yo = 0; yo < dst_height; ++yo)
69 const int xi = (yo % convolved_dims.first) * stride_x;
70 const int yi = (yo / convolved_dims.first) * stride_y;
72 for(
int ci = first_group_ch;
ci < last_group_ch; ++
ci)
74 for(
int yk = 0; yk < kernel_height; ++yk)
76 for(
int xk = 0; xk < kernel_width; ++xk)
85 dst[dst_idx++] =
static_cast<T
>(1);
96 const int stride_x =
conv_info.stride().first;
97 const int stride_y =
conv_info.stride().second;
98 const int kernel_width = kernel_dims.
width;
99 const int kernel_height = kernel_dims.
height;
101 const int pad_y =
conv_info.pad().second;
102 const int src_width =
src.shape().y();
103 const int src_height =
src.shape().z();
104 const int src_channels =
src.shape().x();
105 const int batches =
src.shape().total_size_upper(3);
106 const int dst_width =
has_bias ?
dst.shape().x() - 1 :
dst.shape().x();
107 const int dst_height =
dst.shape().y();
113 #pragma omp parallel for schedule(dynamic, 1) collapse(2)
117 for(
int yo = 0; yo < dst_height; ++yo)
120 const int xi = (yo % convolved_dims.first) * stride_x;
121 const int yi = (yo / convolved_dims.first) * stride_y;
123 for(
int ci = 0;
ci < src_channels; ++
ci)
125 for(
int yk = 0; yk < kernel_height; ++yk)
127 for(
int xk = 0; xk < kernel_width; ++xk)
137 dst[dst_width + yo *
dst.shape().x() +
b *
dst.shape().x() *
dst.shape().y()] =
static_cast<T
>(1);
143 template <
typename T>
146 switch(
src.data_layout())