23.08
|
Go to the documentation of this file.
29 #include "ckw/TensorTileSampler.h"
42 namespace experimental
44 namespace dynamic_fusion
50 inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_t m0, int32_t n0)
52 TensorTileSampler sampler;
54 auto &gid_0 = writer->declare_tile(
"gid_0", ckw::DataType::Int32);
55 auto &gid_1 = writer->declare_tile(
"gid_1", ckw::DataType::Int32);
56 auto &gid_2 = writer->declare_tile(
"gid_2", ckw::DataType::Int32);
58 auto &const_0 = writer->declare_tile(
"0", 0);
59 writer->op_get_global_id(gid_0, 0);
60 writer->op_get_global_id(gid_1, 1);
61 writer->op_get_global_id(gid_2, 2);
63 auto &x_coord = writer->declare_tile(
"x_coord", ckw::DataType::Int32);
64 auto &y_coord = writer->declare_tile(
"y_coord", ckw::DataType::Int32);
65 auto &m0_t = writer->declare_tile(
"m0", m0);
66 auto &n0_t = writer->declare_tile(
"n0", n0);
67 writer->op_binary_expression(x_coord, gid_0, BinaryOp::Mul, n0_t);
68 writer->op_binary_expression(y_coord, gid_1, BinaryOp::Mul, m0_t);
78 sampler.format(TensorSamplerFormat::C_WH_1);
80 sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder);
81 sampler.address_mode_z(TensorSamplerAddressModeZ::Skip);
93 _attributes{ attributes }
103 const unsigned int n0 = root_window.
x().
step();
104 const unsigned int m0 = root_window.y().step();
112 const auto sampler = create_sampler(writer, m0, n0);
117 const auto &sampler =
src->tile_sampler();
121 const auto &src_tile =
src->tile();
122 const auto &sampler =
src->tile_sampler();
131 const TileInfo src_tile_info = src_tile.tile_info();
132 const TileInfo dst_tile_info = TileInfo(target_dt, src_tile_info.height(), src_tile_info.width());
135 auto &
tile = writer->declare_tile(
"dst_tile", dst_tile_info);
136 dst->init_virtual_tensor(
tile, sampler);
139 const auto &dst_tile =
dst->tile();
144 const bool cast_down = (src_size >= dst_size);
148 const auto &constant_x80 = writer->declare_tile(
"0x80", 0x80);
149 writer->op_binary_expression(src_tile, src_tile, BinaryOp::BitwiseXOR, constant_x80);
156 convert_policy = ckw::ConvertPolicy::Saturate;
159 writer->op_cast_expression(dst_tile, src_tile, convert_policy);
Class to describe a number of elements in each dimension.
CastAttributes & convert_policy(const ConvertPolicy &policy)
Set Policy.
constexpr unsigned int vector_size_byte_opencl
SimpleTensor< float > src
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
constexpr int step() const
Return the step of the dimension.
ComponentPtr get_root_component() const
Get the root (first) component of this group.
Attributes are backend-agnostic parameters (in addition to the input/output tensors) of an operator.
virtual size_t element_size() const =0
Element size in bytes calculated as data_size() * num_channels()
The argument of a dynamic fusion component which can be either user tensor or virtual tensor.
SimpleTensor< T > tile(const SimpleTensor< T > &src, const Multiples &multiples)
Window get_window() const override
Generate the execution window for the component.
This is a generic class that packs the arguments of an operator.
ComponentId id() const
Get component id.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
A table of all the variables used in the kernel.
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
virtual const IGpuCkwComponentDriver * ckw_component_driver() const
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
size_t total_size() const
Collapses all dimensions to a single linear total size.
virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override
Generate kernel component code.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
An interface used by GpuCkwDriver to write source code for a kernel component.
__kernel void cast_down(__global uchar *in_ptr, uint in_stride_x, uint in_step_x, uint in_stride_y, uint in_step_y, uint in_stride_z, uint in_step_z, uint in_offset_first_element_in_bytes, __global uchar *out_ptr, uint out_stride_x, uint out_step_x, uint out_stride_y, uint out_step_y, uint out_stride_z, uint out_step_z, uint out_offset_first_element_in_bytes)
This function performs a down-casting.
int32_t ComponentId
Uniquely identifies a kernel component within a workload.
virtual Window get_window() const
Generate the execution window for the component.
Describe a multidimensional execution window.
ckw::DataType to_ckw(DataType dt)
Copyright (c) 2017-2023 Arm Limited.
ConvertPolicy
Policy to handle integer overflow.
unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0)
Returns the adjusted vector size in case it is less than the input's first dimension,...
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
ArgumentPack< ITensorInfo > tensors() const
Get tensor arguments.
GpuCkwComponentArgument * declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage, const std::string &alias="unnamed")
Declare a kernel component variable(argument) for the corresponding tensor info.
A group of gpu kernel components to be fused together PRECONDITIONS:
bool is_data_type_quantized(DataType dt)
Check if a given data type is of quantized type.
unsigned int num_elems_processed_per_iteration
constexpr const Dimension & x() const
Alias to access the first dimension of the window.
CastAttributes & data_type(const DataType &data_type)
Set Data Type to be casted to.
void op_load_once(GpuCkwComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler)
Load the user tensor to the tile in the same component argument if it hasn't been loaded.
Helper to automatically manage kernel writer ID space.