24 #ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION 34 namespace experimental
36 namespace dynamic_fusion
40 Status add_kernel_tensor(ClKernelGraph &k_graph,
const OperatorGraph::Implementation &
op_graph,
const OpTensorContent &op_tensor,
MemoryType memory_type, AuxMemoryInfo memory_info,
44 return k_graph.add_kernel_tensor(op_tensor.desc, memory_type, memory_info,
id, op_tensor.id);
47 Status add_kernel_tensor(ClKernelGraph &k_graph,
const OperatorGraph::Implementation &op_graph,
const OpTensorContent &op_tensor,
DependencyGraph::Id &
id)
57 bool is_src_tensor_of_graph =
is_in(op_tensor.id, op_graph.graph.src_tensors());
58 bool is_dst_tensor_of_graph =
is_in(op_tensor.id, op_graph.graph.dst_tensors());
60 AuxMemoryInfo memory_info;
61 if(is_src_tensor_of_graph || is_dst_tensor_of_graph)
69 memory_info.size = op_tensor.desc->total_size();
71 return add_kernel_tensor(k_graph, op_graph, op_tensor, memory_type, memory_info,
id);
105 return std::make_tuple(t0.
id()) == std::make_tuple(t1.
id());
118 return std::make_tuple() == std::make_tuple();
123 const auto converted = *utils::cast::polymorphic_downcast<const Conv2dContent *>(&other);
124 return desc == converted.desc;
129 const auto converted = *utils::cast::polymorphic_downcast<const AddContent *>(&other);
130 return desc == converted.desc;
149 using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo, DataLayout>;
150 using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>;
152 const std::vector<ConfigurationMethod> known_configs =
155 ConfigurationMethod(ConvolutionConfiguration(
Size2D(27
U, 27
U),
Size2D(5
U, 5
U),
Size2D(48
U, 128
U),
PadStrideInfo(1
U, 1
U, 2
U, 2
U),
DataLayout::NCHW),
ConvolutionMethod::DIRECT),
157 ConfigurationMethod(ConvolutionConfiguration(
Size2D(224
U, 224
U),
Size2D(3
U, 3
U),
Size2D(3
U, 64
U),
PadStrideInfo(1
U, 1
U, 1
U, 1
U),
DataLayout::NCHW),
ConvolutionMethod::DIRECT),
159 ConfigurationMethod(ConvolutionConfiguration(
Size2D(224
U, 224
U),
Size2D(3
U, 3
U),
Size2D(3
U, 32
U),
PadStrideInfo(2
U, 2
U, 0
U, 1
U, 0
U, 1
U,
DimensionRoundingType::FLOOR),
DataLayout::NCHW),
ConvolutionMethod::GEMM),
161 ConfigurationMethod(ConvolutionConfiguration(
Size2D(160
U, 160
U),
Size2D(3
U, 3
U),
Size2D(3
U, 24
U),
PadStrideInfo(2
U, 2
U, 0
U, 1
U, 0
U, 1
U,
DimensionRoundingType::FLOOR),
DataLayout::NCHW),
ConvolutionMethod::GEMM),
163 ConfigurationMethod(ConvolutionConfiguration(
Size2D(224
U, 224
U),
Size2D(3
U, 3
U),
Size2D(3
U, 32
U),
PadStrideInfo(2
U, 2
U, 0
U, 1
U, 0
U, 1
U,
DimensionRoundingType::FLOOR),
DataLayout::NHWC),
ConvolutionMethod::GEMM),
165 ConfigurationMethod(ConvolutionConfiguration(
Size2D(160
U, 160
U),
Size2D(3
U, 3
U),
Size2D(3
U, 24
U),
PadStrideInfo(2
U, 2
U, 0
U, 1
U, 0
U, 1
U,
DimensionRoundingType::FLOOR),
DataLayout::NHWC),
ConvolutionMethod::GEMM),
168 const auto find_config = [&](ConfigurationMethod c)
170 const ConvolutionConfiguration config = c.first;
176 && info.
pad_bottom() == legacy_pad_stride.pad_bottom() && info.
pad_left() == legacy_pad_stride.pad_left() && info.
stride() == legacy_pad_stride.stride() && (data_layout == src->
data_layout());
179 std::vector<ConfigurationMethod>::const_iterator found;
180 if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())
182 return (*found).second;
198 const size_t kernel_sz_direct_conv_thr = get_direct_conv_kernel_threshold_nhwc(gpu_target);
212 const bool is_large_kernel_sz = (weights->
dimension(idx_w) >= kernel_sz_direct_conv_thr) && (weights->
dimension(idx_h) >= kernel_sz_direct_conv_thr);
213 const bool is_ifm_ge_16 = src->
dimension(idx_c) >= 16;
214 const bool is_ofm_lte_8 = weights->
dimension(3
U) <= 8;
215 const bool workload_gte_8192 = (output_shape[0] * output_shape[1] * output_shape[2]) / 16 >= 8192;
223 if(is_large_kernel_sz && is_ifm_ge_16 && is_ifm_gt_ofm)
230 if((is_large_kernel_sz && workload_gte_8192 && is_ifm_ge_16) || (is_ofm_lte_8 && is_ifm_ge_16))
258 return translate_direct_conv2d(kernel_graph);
278 auto st = add_kernel_tensor(kernel_graph, *_graph, *
input, input_id);
283 st = add_kernel_tensor(kernel_graph, *_graph, *weight, weight_id);
290 st = add_kernel_tensor(kernel_graph, *_graph, *
bias, bias_id);
296 st = add_kernel_tensor(kernel_graph, *_graph, *
dst, dst_id);
324 auto st = add_kernel_tensor(kernel_graph, *_graph, *lhs, lhs_id);
329 st = add_kernel_tensor(kernel_graph, *_graph, *rhs, rhs_id);
334 st = add_kernel_tensor(kernel_graph, *_graph, *
dst, dst_id);
353 std::vector<const OperatorContent *> ops;
355 for(
const auto &
pack : sorted.second)
364 std::vector<OperatorContent *> ops;
366 for(
const auto &
pack : sorted.second)
375 for(
const auto &op :
traverse(op_graph))
377 const auto st = op->translate(kernel_graph);
Operator Tensor Handle This can be either an argument tensor, or an intermediate tensor linking 2 Ope...
size_t bottom
Padding across the height dimension on the bottom, in elements.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
GPUTarget target() const
Get the target GPU.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
DimensionRoundingType
Dimension rounding type when down-scaling on CNNs.
Store the tensor's metadata.
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ClDirectConv2dKernelDescriptor &conv2d_desc)
size_t x() const
Semantic accessor for width as x.
unsigned int pad_top() const
Get the top padding.
Core memory used by the Workload Tensor, e.g.
std::pair< Status, std::vector< OpPack > > topological_sort() const
Sort the graph in a topological order.
static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst)
std::vector< const ClKernel * > traverse(const ClKernelFusionGroup &group)
MemoryType
Type of memory used by a Workload Tensor.
ConvolutionMethod
Available ConvolutionMethod.
SimpleTensor< float > src
Copyright (c) 2017-2022 Arm Limited.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
size_t right
Padding across the width dimension on the right, in elements.
bool is_in(const T &v, const std::vector< T > &vec)
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
Configurations for ClKernel.
bool operator==(const OperatorContent &other) const override
unsigned int pad_right() const
Get the right padding.
Run every time after the first time.
Padding and stride information class.
Auxiliary memory required by the Workload Tensor, e.g.
Descriptor for Addition operation.
Describes when a Unit Workload is run.
size_t left
Padding across the width dimension on the left, in elements.
Num samples, channels, height, width.
size_t y() const
Semantic accessor for height as y.
Status translate(ClKernelGraph &kernel_graph) const override
Descriptor for Conv2dDescriptor operation.
void add_const_tensor(int id, const TDesc *tensor)
Add const tensor to the pack.
ConvolutionMethod forced_method
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
#define ARM_COMPUTE_RETURN_ERROR_MSG(...)
An error is returned with the given description.
GPUTarget
Available GPU Targets.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
Class for specifying the size of an image or rectangle.
Num samples, height, width, channels.
Status add_kernel(const ClKernelConfig &config, const KernelDescT &desc, const ITensorDescPack< ClKernelTensor > &tensors, Id &kernel_id)
bool operator==(const OperatorContent &other) const override
Id id() const
Id of the OpTensor.
static ConvolutionMethod select_conv_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dDescriptor &conv2d_desc, const GPUTarget gpu_target)
Replicate heuristics of ClConv2d::get_convolution_method(), except that non-supported data types and ...
size_t top
Padding across the height dimension on the top, in elements.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
bool forced_method_enabled
Padding information for 2D operations like Conv2dDescriptor.
unsigned int pad_bottom() const
Get the bottom padding.
unsigned int pad_left() const
Get the left padding.
bool operator==(const OpTensor &t0, const OpTensor &t1)
DataLayout
[DataLayout enum definition]
ClKernelTensor * get_tensor(Id id)
TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info)
Calculate the deep convolution shape output shape of a tensor.
bool is_data_type_float(DataType dt)
Check if a given data type is of floating point type.
Status translate(ClKernelGraph &kernel_graph) const override
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.