45 namespace experimental
47 namespace dynamic_fusion
51 constexpr uint32_t opencl_vector_size_in_bytes = 16;
62 void GpuCkwResize::do_nearest_neighbor_resize(
const ComponentGroup &comp_group,
63 GpuCkwVariableTable &vtable,
64 GpuCkwScopedKernelWriter writer)
const
72 GpuCkwComponentArgument *
src = vtable.declare_variable(comp_group, writer, _src,
"src");
73 GpuCkwComponentArgument *
dst = vtable.declare_variable(comp_group, writer, _dst,
"dst");
88 auto const_src_w_i32 = writer->declare_constant_tile(ckw::ConstantData({{src_w}}, ckw::DataType::Int32));
89 auto const_src_h_i32 = writer->declare_constant_tile(ckw::ConstantData({{src_h}}, ckw::DataType::Int32));
90 auto const_dst_h_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_h}}, ckw::DataType::Int32));
91 auto const_pos_1_i32 = writer->declare_constant_tile(ckw::ConstantData({{1}}, ckw::DataType::Int32));
92 auto const_0_i32 = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
93 auto const_0_fp = writer->declare_constant_tile(ckw::ConstantData({{0.0f}}, dst_dt));
94 auto const_pos_0_5_fp = writer->declare_constant_tile(ckw::ConstantData({{0.5f}}, ckw::DataType::Fp32));
95 auto const_scale_x_fp = writer->declare_constant_tile(ckw::ConstantData({{
scale_x}}, ckw::DataType::Fp32));
96 auto const_scale_y_fp = writer->declare_constant_tile(ckw::ConstantData({{
scale_y}}, ckw::DataType::Fp32));
104 const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
107 const int32_t dst_n0 = root_window.x().step();
113 const int32_t dst_n0_partial = _dst->
dimension(0) % dst_n0;
116 const int32_t dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
118 ckw::TensorSampler sampler_dst;
119 sampler_dst.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
120 if (dst_n0_partial == 0)
126 sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::OverlappingMin);
130 sampler_dst.storage(ckw::TensorStorageType::BufferUint8Ptr);
133 auto tile_dst = writer->declare_tile(
"dst", ckw::TileInfo(dst_dt, 1, dst_n0));
136 writer->op_assign(tile_dst, const_0_fp);
139 dst->init_virtual_tensor(tile_dst, sampler_dst);
144 auto const_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_n0}}, ckw::DataType::Int32));
145 auto const_shift_back_n0_i32 =
146 writer->declare_constant_tile(ckw::ConstantData({{dst_shift_back}}, ckw::DataType::Int32));
151 ckw::TensorSampler sampler_src;
152 sampler_src.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
166 auto tile_gid_0 = writer->declare_tile(
"gid_0", ckw::TileInfo(ckw::DataType::Int32));
167 auto tile_gid_1 = writer->declare_tile(
"gid_1", ckw::TileInfo(ckw::DataType::Int32));
168 auto tile_gid_2 = writer->declare_tile(
"gid_2", ckw::TileInfo(ckw::DataType::Int32));
170 writer->op_get_global_id(tile_gid_0, 0);
171 writer->op_get_global_id(tile_gid_1, 1);
172 writer->op_get_global_id(tile_gid_2, 2);
174 auto tile_co = writer->declare_tile(
"co", ckw::TileInfo(ckw::DataType::Int32));
175 auto tile_xo = writer->declare_tile(
"xo", ckw::TileInfo(ckw::DataType::Int32));
176 auto tile_yo = writer->declare_tile(
"yo", ckw::TileInfo(ckw::DataType::Int32));
177 auto tile_bo = writer->declare_tile(
"bo", ckw::TileInfo(ckw::DataType::Int32));
182 writer->op_assign(tile_xo, tile_gid_1);
183 writer->op_binary(tile_yo, ckw::BinaryOp::Mod, tile_gid_2, const_dst_h_i32);
184 writer->op_binary(tile_bo, ckw::BinaryOp::Div, tile_gid_2, const_dst_h_i32);
189 auto tile_xi_f = writer->declare_tile(
"xi_f", ckw::DataType::Fp32);
190 auto tile_yi_f = writer->declare_tile(
"yi_f", ckw::DataType::Fp32);
199 writer->op_binary(tile_xi_f, ckw::BinaryOp::Mul, tile_xi_f, const_scale_x_fp);
200 writer->op_binary(tile_yi_f, ckw::BinaryOp::Mul, tile_yi_f, const_scale_y_fp);
206 const auto &tile_xo_plus_half = writer->declare_tile(
"xo_plus_half", ckw::DataType::Fp32);
207 const auto &tile_yo_plus_half = writer->declare_tile(
"yo_plus_half", ckw::DataType::Fp32);
211 writer->op_binary(tile_xo_plus_half, ckw::BinaryOp::Add, tile_xo_plus_half, const_pos_0_5_fp);
212 writer->op_binary(tile_yo_plus_half, ckw::BinaryOp::Add, tile_yo_plus_half, const_pos_0_5_fp);
213 writer->op_binary(tile_xi_f, ckw::BinaryOp::Mul, tile_xo_plus_half, const_scale_x_fp);
214 writer->op_binary(tile_yi_f, ckw::BinaryOp::Mul, tile_yo_plus_half, const_scale_y_fp);
223 writer->op_unary(tile_xi_f, ckw::UnaryOp::Round, tile_xi_f);
224 writer->op_unary(tile_yi_f, ckw::UnaryOp::Round, tile_yi_f);
229 auto tile_xi_f_int = writer->declare_tile(
"xi_f_int", ckw::DataType::Int32);
230 auto tile_yi_f_int = writer->declare_tile(
"yi_f_int", ckw::DataType::Int32);
235 auto tile_src_w_minus_1 = writer->declare_tile(
"src_w_minus_1", ckw::DataType::Int32);
236 auto tile_src_h_minus_1 = writer->declare_tile(
"src_h_minus_1", ckw::DataType::Int32);
238 writer->op_binary(tile_src_w_minus_1, ckw::BinaryOp::Sub, const_src_w_i32, const_pos_1_i32);
239 writer->op_binary(tile_src_h_minus_1, ckw::BinaryOp::Sub, const_src_h_i32, const_pos_1_i32);
241 auto tile_xi0 = writer->declare_tile(
"xi0", ckw::DataType::Int32);
242 auto tile_yi0 = writer->declare_tile(
"yi0", ckw::DataType::Int32);
244 writer->op_ternary(tile_xi0, ckw::TernaryOp::Clamp, tile_xi_f_int, const_0_i32, tile_src_w_minus_1);
245 writer->op_ternary(tile_yi0, ckw::TernaryOp::Clamp, tile_yi_f_int, const_0_i32, tile_src_h_minus_1);
247 auto tile_src = writer->declare_tile(
"src_tile", ckw::TileInfo(dst_dt, 1, dst_n0));
248 writer->op_load(tile_src,
src->tensor(), sampler_src, tile_co, tile_xi0, tile_yi0, tile_bo);
250 writer->op_assign(tile_dst, tile_src);
253 void GpuCkwResize::do_bilinear_resize(
const ComponentGroup &comp_group,
254 GpuCkwVariableTable &vtable,
255 GpuCkwScopedKernelWriter writer)
const
263 GpuCkwComponentArgument *
src = vtable.declare_variable(comp_group, writer, _src,
"src");
264 GpuCkwComponentArgument *
dst = vtable.declare_variable(comp_group, writer, _dst,
"dst");
279 auto const_src_w_i32 = writer->declare_constant_tile(ckw::ConstantData({{src_w}}, ckw::DataType::Int32));
280 auto const_src_h_i32 = writer->declare_constant_tile(ckw::ConstantData({{src_h}}, ckw::DataType::Int32));
281 auto const_dst_h_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_h}}, ckw::DataType::Int32));
282 auto const_pos_1_i32 = writer->declare_constant_tile(ckw::ConstantData({{1}}, ckw::DataType::Int32));
283 auto const_0_i32 = writer->declare_constant_tile(ckw::ConstantData({{0}}, ckw::DataType::Int32));
284 auto const_0_fp = writer->declare_constant_tile(ckw::ConstantData({{0.0f}}, dst_dt));
285 auto const_pos_1_fp = writer->declare_constant_tile(ckw::ConstantData({{1.0f}}, ckw::DataType::Fp32));
286 auto const_pos_0_5_fp = writer->declare_constant_tile(ckw::ConstantData({{0.5f}}, ckw::DataType::Fp32));
287 auto const_scale_x_fp = writer->declare_constant_tile(ckw::ConstantData({{
scale_x}}, ckw::DataType::Fp32));
288 auto const_scale_y_fp = writer->declare_constant_tile(ckw::ConstantData({{
scale_y}}, ckw::DataType::Fp32));
296 const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
299 const int32_t dst_n0 = root_window.x().step();
305 const int32_t dst_n0_partial = _dst->
dimension(0) % dst_n0;
308 const int32_t dst_shift_back = (dst_n0 - dst_n0_partial) % dst_n0;
310 ckw::TensorSampler sampler_dst;
311 sampler_dst.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
312 if (dst_n0_partial == 0)
318 sampler_dst.address_mode_x(ckw::TensorSamplerAddressModeX::OverlappingMin);
322 sampler_dst.storage(ckw::TensorStorageType::BufferUint8Ptr);
325 auto tile_dst = writer->declare_tile(
"dst", ckw::TileInfo(dst_dt, 1, dst_n0));
328 writer->op_assign(tile_dst, const_0_fp);
331 dst->init_virtual_tensor(tile_dst, sampler_dst);
336 auto const_n0_i32 = writer->declare_constant_tile(ckw::ConstantData({{dst_n0}}, ckw::DataType::Int32));
337 auto const_shift_back_n0_i32 =
338 writer->declare_constant_tile(ckw::ConstantData({{dst_shift_back}}, ckw::DataType::Int32));
343 ckw::TensorSampler sampler_src;
344 sampler_src.format(ckw::TensorSamplerFormat::Dim0_Dim1_Dim2);
358 auto tile_gid_0 = writer->declare_tile(
"gid_0", ckw::TileInfo(ckw::DataType::Int32));
359 auto tile_gid_1 = writer->declare_tile(
"gid_1", ckw::TileInfo(ckw::DataType::Int32));
360 auto tile_gid_2 = writer->declare_tile(
"gid_2", ckw::TileInfo(ckw::DataType::Int32));
362 writer->op_get_global_id(tile_gid_0, 0);
363 writer->op_get_global_id(tile_gid_1, 1);
364 writer->op_get_global_id(tile_gid_2, 2);
366 auto tile_co = writer->declare_tile(
"co", ckw::TileInfo(ckw::DataType::Int32));
367 auto tile_xo = writer->declare_tile(
"xo", ckw::TileInfo(ckw::DataType::Int32));
368 auto tile_yo = writer->declare_tile(
"yo", ckw::TileInfo(ckw::DataType::Int32));
369 auto tile_bo = writer->declare_tile(
"bo", ckw::TileInfo(ckw::DataType::Int32));
374 writer->op_assign(tile_xo, tile_gid_1);
375 writer->op_binary(tile_yo, ckw::BinaryOp::Mod, tile_gid_2, const_dst_h_i32);
376 writer->op_binary(tile_bo, ckw::BinaryOp::Div, tile_gid_2, const_dst_h_i32);
381 auto tile_xi_f = writer->declare_tile(
"xi_f", ckw::DataType::Fp32);
382 auto tile_yi_f = writer->declare_tile(
"yi_f", ckw::DataType::Fp32);
391 writer->op_binary(tile_xi_f, ckw::BinaryOp::Mul, tile_xi_f, const_scale_x_fp);
392 writer->op_binary(tile_yi_f, ckw::BinaryOp::Mul, tile_yi_f, const_scale_y_fp);
398 const auto &tile_xo_plus_half = writer->declare_tile(
"xo_plus_half", ckw::DataType::Fp32);
399 const auto &tile_yo_plus_half = writer->declare_tile(
"yo_plus_half", ckw::DataType::Fp32);
403 writer->op_binary(tile_xo_plus_half, ckw::BinaryOp::Add, tile_xo_plus_half, const_pos_0_5_fp);
404 writer->op_binary(tile_yo_plus_half, ckw::BinaryOp::Add, tile_yo_plus_half, const_pos_0_5_fp);
405 writer->op_binary(tile_xi_f, ckw::BinaryOp::Mul, tile_xo_plus_half, const_scale_x_fp);
406 writer->op_binary(tile_yi_f, ckw::BinaryOp::Mul, tile_yo_plus_half, const_scale_y_fp);
408 writer->op_binary(tile_xi_f, ckw::BinaryOp::Sub, tile_xi_f, const_pos_0_5_fp);
409 writer->op_binary(tile_yi_f, ckw::BinaryOp::Sub, tile_yi_f, const_pos_0_5_fp);
418 auto tile_xi_f_floor = writer->declare_tile(
"xi_f_floor", ckw::DataType::Fp32);
419 auto tile_yi_f_floor = writer->declare_tile(
"yi_f_floor", ckw::DataType::Fp32);
420 writer->op_unary(tile_xi_f_floor, ckw::UnaryOp::Floor, tile_xi_f);
421 writer->op_unary(tile_yi_f_floor, ckw::UnaryOp::Floor, tile_yi_f);
423 auto tile_xi = writer->declare_tile(
"xi", ckw::DataType::Int32);
424 auto tile_yi = writer->declare_tile(
"yi", ckw::DataType::Int32);
432 auto tile_src_w_minus_1 = writer->declare_tile(
"src_w_minus_1", ckw::DataType::Int32);
433 auto tile_src_h_minus_1 = writer->declare_tile(
"src_h_minus_1", ckw::DataType::Int32);
434 writer->op_binary(tile_src_w_minus_1, ckw::BinaryOp::Sub, const_src_w_i32, const_pos_1_i32);
435 writer->op_binary(tile_src_h_minus_1, ckw::BinaryOp::Sub, const_src_h_i32, const_pos_1_i32);
437 auto tile_xi_plus_1 = writer->declare_tile(
"xi_plus_1", ckw::DataType::Int32);
438 auto tile_yi_plus_1 = writer->declare_tile(
"yi_plus_1", ckw::DataType::Int32);
439 writer->op_binary(tile_xi_plus_1, ckw::BinaryOp::Add, tile_xi, const_pos_1_i32);
440 writer->op_binary(tile_yi_plus_1, ckw::BinaryOp::Add, tile_yi, const_pos_1_i32);
442 auto tile_xi0 = writer->declare_tile(
"xi0", ckw::DataType::Int32);
443 auto tile_yi0 = writer->declare_tile(
"yi0", ckw::DataType::Int32);
444 auto tile_xi1 = writer->declare_tile(
"xi1", ckw::DataType::Int32);
445 auto tile_yi1 = writer->declare_tile(
"yi1", ckw::DataType::Int32);
447 writer->op_ternary(tile_xi0, ckw::TernaryOp::Clamp, tile_xi, const_0_i32, tile_src_w_minus_1);
448 writer->op_ternary(tile_yi0, ckw::TernaryOp::Clamp, tile_yi, const_0_i32, tile_src_h_minus_1);
449 writer->op_ternary(tile_xi1, ckw::TernaryOp::Clamp, tile_xi_plus_1, const_0_i32, tile_src_w_minus_1);
450 writer->op_ternary(tile_yi1, ckw::TernaryOp::Clamp, tile_yi_plus_1, const_0_i32, tile_src_h_minus_1);
452 auto tile_in00 = writer->declare_tile(
"in00", ckw::TileInfo(dst_dt, 1, dst_n0));
453 auto tile_in01 = writer->declare_tile(
"in01", ckw::TileInfo(dst_dt, 1, dst_n0));
454 auto tile_in10 = writer->declare_tile(
"in10", ckw::TileInfo(dst_dt, 1, dst_n0));
455 auto tile_in11 = writer->declare_tile(
"in11", ckw::TileInfo(dst_dt, 1, dst_n0));
457 writer->op_load(tile_in00,
src->tensor(), sampler_src, tile_co, tile_xi0, tile_yi0, tile_bo);
458 writer->op_load(tile_in01,
src->tensor(), sampler_src, tile_co, tile_xi1, tile_yi0, tile_bo);
459 writer->op_load(tile_in10,
src->tensor(), sampler_src, tile_co, tile_xi0, tile_yi1, tile_bo);
460 writer->op_load(tile_in11,
src->tensor(), sampler_src, tile_co, tile_xi1, tile_yi1, tile_bo);
463 auto tile_a = writer->declare_tile(
"a", ckw::DataType::Fp32);
464 auto tile_b = writer->declare_tile(
"b", ckw::DataType::Fp32);
465 auto tile_a1 = writer->declare_tile(
"a1", ckw::DataType::Fp32);
466 auto tile_b1 = writer->declare_tile(
"b1", ckw::DataType::Fp32);
472 auto tile_xi_float = writer->declare_tile(
"xi_float", ckw::DataType::Fp32);
473 auto tile_yi_float = writer->declare_tile(
"yi_float", ckw::DataType::Fp32);
477 writer->op_binary(tile_a, ckw::BinaryOp::Sub, tile_xi_f, tile_xi_float);
478 writer->op_binary(tile_b, ckw::BinaryOp::Sub, const_pos_1_fp, tile_a);
479 writer->op_binary(tile_a1, ckw::BinaryOp::Sub, tile_yi_f, tile_yi_float);
480 writer->op_binary(tile_b1, ckw::BinaryOp::Sub, const_pos_1_fp, tile_a1);
483 const auto &tile_a_src_type = writer->declare_tile(
"a_src_t",
to_ckw(_src->
data_type()));
484 const auto &tile_b_src_type = writer->declare_tile(
"b_src_t",
to_ckw(_src->
data_type()));
485 const auto &tile_a1_src_type = writer->declare_tile(
"a1_src_t",
to_ckw(_src->
data_type()));
486 const auto &tile_b1_src_type = writer->declare_tile(
"b1_src_t",
to_ckw(_src->
data_type()));
494 writer->op_binary(tile_in00, ckw::BinaryOp::Mul, tile_in00, tile_b_src_type);
495 writer->op_binary(tile_in00, ckw::BinaryOp::Mul, tile_in00, tile_b1_src_type);
498 writer->op_binary(tile_in01, ckw::BinaryOp::Mul, tile_in01, tile_a_src_type);
499 writer->op_binary(tile_in01, ckw::BinaryOp::Mul, tile_in01, tile_b1_src_type);
502 writer->op_binary(tile_in10, ckw::BinaryOp::Mul, tile_in10, tile_b_src_type);
503 writer->op_binary(tile_in10, ckw::BinaryOp::Mul, tile_in10, tile_a1_src_type);
506 writer->op_binary(tile_in11, ckw::BinaryOp::Mul, tile_in11, tile_a_src_type);
507 writer->op_binary(tile_in11, ckw::BinaryOp::Mul, tile_in11, tile_a1_src_type);
510 writer->op_assign(tile_dst, tile_in00);
511 writer->op_binary(tile_dst, ckw::BinaryOp::Add, tile_dst, tile_in01);
512 writer->op_binary(tile_dst, ckw::BinaryOp::Add, tile_dst, tile_in10);
513 writer->op_binary(tile_dst, ckw::BinaryOp::Add, tile_dst, tile_in11);
523 do_nearest_neighbor_resize(comp_group, vtable, writer);
526 do_bilinear_resize(comp_group, vtable, writer);
546 std::string tuner_id =
"resize_";
567 std::string
name =
"resize_";