43 const ITensorInfo *output,
51 if (output->total_size() != 0)
53 const TensorShape expected_output_shape =
55 const TensorInfo expected_output_info =
input->clone()->set_tensor_shape(expected_output_shape);
64 void NEPadLayerKernel::run_pad_constant(
const Window &window)
66 Window output_window{
window};
70 Iterator output_it(_output, output_window);
73 [&](
const Coordinates &
id)
76 for (
size_t dim = _padding.size() - 1; dim > 0; --dim)
78 idin[dim] -= _padding[dim].first;
79 if (idin[dim] < 0 || static_cast<int>(_input->info()->dimension(dim)) - 1 < idin[dim])
81 std::fill_n(reinterpret_cast<T *>(output_it.ptr()), _output->info()->dimension(0),
82 _constant_value.get<T>());
86 T *input_it_ptr =
reinterpret_cast<T *
>(_input->
ptr_to_element(idin));
87 T *output_it_ptr =
reinterpret_cast<T *
>(output_it.ptr());
88 std::fill_n(output_it_ptr, _padding[0].first, _constant_value.
get<T>());
89 memcpy(output_it_ptr + _padding[0].first, input_it_ptr, _input->
info()->
dimension(0) * element_size);
90 std::fill_n(output_it_ptr + _padding[0].first + _input->
info()->
dimension(0), _padding[0].second,
91 _constant_value.
get<T>());
96 void NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad(
const Window &window)
100 const size_t start_plane = window.z().start();
101 const size_t end_plane = window.z().end();
103 size_t start_plane_input = start_plane;
104 if (_padding.size() > 2)
106 start_plane_input = (start_plane < _padding[2].first) ? 0 : start_plane - _padding[2].first;
108 const int output_plane_size = _output->info()->dimension(0) * _output->info()->dimension(1);
109 const int input_plane_size = _input->info()->dimension(0) * _input->info()->dimension(1);
111 const int pad_y_elems_top = (_padding.size() > 1 ? _padding[1].first : 0) * _output->info()->dimension(0);
112 const int pad_y_elems_bot = (_padding.size() > 1 ? _padding[1].second : 0) * _output->info()->dimension(0);
114 const size_t jump_to_next_row_input = _input->info()->dimension(0);
115 const size_t jump_to_next_row_output = _padding[0].first + _padding[0].second;
117 uint8_t *output_row_ptr =
118 _output->buffer() + _output->info()->offset_first_element_in_bytes() + start_plane * output_plane_size;
119 const uint8_t *input_it_ptr =
120 _input->buffer() + _input->info()->offset_first_element_in_bytes() + start_plane_input * input_plane_size;
121 const auto pad_value = _constant_value.get<uint8_t>();
123 for (
size_t z_i = start_plane; z_i < end_plane; ++z_i)
125 if (_padding.size() > 2 && z_i < _padding[2].first)
127 memset(output_row_ptr, pad_value, output_plane_size);
128 output_row_ptr += output_plane_size;
130 else if (_padding.size() > 2 && z_i > (_input->info()->dimension(2) + _padding[2].first - 1))
132 memset(output_row_ptr, pad_value, output_plane_size);
133 output_row_ptr += output_plane_size;
137 memset(output_row_ptr, pad_value, pad_y_elems_top);
138 output_row_ptr += pad_y_elems_top;
139 size_t y_i = _input->info()->dimension(1);
141 for (; y_i > 3; y_i -= 4)
143 memset(output_row_ptr, pad_value, _padding[0].first);
144 output_row_ptr += _padding[0].first;
146 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
147 output_row_ptr += _input->info()->dimension(0);
148 input_it_ptr += jump_to_next_row_input;
150 memset(output_row_ptr, pad_value, _padding[0].second + _padding[0].first);
151 output_row_ptr += jump_to_next_row_output;
153 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
154 output_row_ptr += _input->info()->dimension(0);
155 input_it_ptr += jump_to_next_row_input;
157 memset(output_row_ptr, pad_value, _padding[0].second + _padding[0].first);
158 output_row_ptr += jump_to_next_row_output;
160 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
161 output_row_ptr += _input->info()->dimension(0);
162 input_it_ptr += jump_to_next_row_input;
164 memset(output_row_ptr, pad_value, _padding[0].second + _padding[0].first);
165 output_row_ptr += jump_to_next_row_output;
167 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
168 output_row_ptr += _input->info()->dimension(0);
169 input_it_ptr += jump_to_next_row_input;
171 memset(output_row_ptr, pad_value, _padding[0].second);
172 output_row_ptr += _padding[0].second;
174 for (; y_i > 0; --y_i)
176 memset(output_row_ptr, pad_value, _padding[0].first);
177 output_row_ptr += _padding[0].first;
179 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
180 output_row_ptr += _input->info()->dimension(0);
181 input_it_ptr += _input->info()->dimension(0);
183 memset(output_row_ptr, pad_value, _padding[0].second);
184 output_row_ptr += _padding[0].second;
186 memset(output_row_ptr, pad_value, pad_y_elems_bot);
187 output_row_ptr += pad_y_elems_bot;
192 NEPadLayerKernel::NEPadLayerKernel()
193 : _func(), _input(nullptr), _output(nullptr), _padding(), _constant_value(), _mode()
207 const TensorInfo expected_output_info =
input->info()->clone()->set_tensor_shape(expected_output_shape);
216 _constant_value = constant_value;
225 padding.size() <= 3 &&
228 _func = &NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad;
232 _func = &NEPadLayerKernel::run_pad_constant<uint8_t>;
236 _func = &NEPadLayerKernel::run_pad_constant<uint16_t>;
239 _func = &NEPadLayerKernel::run_pad_constant<uint32_t>;
256 ICPPKernel::configure(win);
276 if (_func !=
nullptr)