24 #ifndef SRC_CORE_NEON_KERNELS_CROP_IMPL_H
25 #define SRC_CORE_NEON_KERNELS_CROP_IMPL_H
43 int32_t window_step_x,
44 int32_t output_width_start,
45 int32_t output_width_limit,
46 bool input_has_single_channel,
47 bool is_width_flipped)
53 if (input_has_single_channel)
55 int32_t x = output_width_start;
57 negative_offset.
set(1, negative_offset[1] - window_step_x + 1);
58 for (; x <= output_width_limit - window_step_x; x += window_step_x, negative_offset[1] -= window_step_x)
60 auto in =
load_as_f32(
reinterpret_cast<T *
>(
input->ptr_to_element(negative_offset)));
67 input_offset[1] = negative_offset[1] + window_step_x - 1;
68 for (; x < output_width_limit; ++x, --input_offset[1])
70 *(output_ptr + x) =
static_cast<float>(*
reinterpret_cast<T *
>(
input->ptr_to_element(input_offset)));
75 for (int32_t x = output_width_start; x < output_width_limit; ++x, --input_offset[1])
77 input_offset.
set(0, 0);
79 for (; c <= static_cast<int32_t>(
input->info()->dimension(0)) - window_step_x;
80 c += window_step_x, input_offset[0] += window_step_x)
82 auto in =
load_as_f32(
reinterpret_cast<T *
>(
input->ptr_to_element(input_offset)));
85 for (; c < static_cast<int32_t>(
input->info()->dimension(0)); ++c, ++input_offset[0])
88 static_cast<float>(*
reinterpret_cast<T *
>(
input->ptr_to_element(input_offset)));
96 if (std::is_same<T, float>::value)
98 memcpy(
static_cast<void *
>(output_ptr + output_width_start * output->
info()->
dimension(0)),
99 reinterpret_cast<const void *
>(
input->ptr_to_element(input_offset)),
100 (output_width_limit - output_width_start) * output->
info()->
dimension(0) *
107 (output_width_limit - output_width_start) *
static_cast<int32_t
>(output->
info()->
dimension(0));
108 float *output_start_ptr = output_ptr + output_width_start * output->
info()->
dimension(0);
109 for (; x <= limit - window_step_x; x += window_step_x, input_offset[0] += window_step_x)
111 auto in =
load_as_f32(
reinterpret_cast<T *
>(
input->ptr_to_element(input_offset)));
114 for (; x < limit; ++x, ++input_offset[0])
116 *(output_start_ptr + x) =
117 static_cast<float>(*
reinterpret_cast<T *
>(
input->ptr_to_element(input_offset)));
124 #endif //SRC_CORE_NEON_KERNELS_CROP_IMPL_H