24.02.1
|
Go to the documentation of this file.
39 namespace experimental
41 namespace dynamic_fusion
45 void calculate_and_init_dst_if_empty(ITensorInfo *
dst,
46 const ITensorInfo *
src,
47 const ITensorInfo *wei,
48 const DepthwiseConv2dAttributes &attributes)
50 if (
dst->total_size() == 0U)
52 const PadStrideInfo pad_stride_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
53 attributes.pad().right, attributes.pad().top, attributes.pad().bottom,
54 attributes.dimension_rounding_type());
56 const ConvolutionInfo
conv_info{pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(),
57 attributes.dilation()};
67 Status is_supported_op_helper(
const GpuWorkloadContext &
context,
68 const ITensorInfo *
src,
69 const ITensorInfo *wei,
70 const ITensorInfo *bia,
71 const ITensorInfo *
dst,
72 const DepthwiseConv2dAttributes &attributes)
76 TensorInfo dst_info_to_validate;
77 const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
81 dst_info_to_validate_ptr =
dst;
84 calculate_and_init_dst_if_empty(&dst_info_to_validate,
src, wei, attributes);
96 const CLCompileContext *cl_compile_ctx =
context.cl_compile_context();
101 const auto properties =
105 const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(),
106 attributes.pad().left, attributes.pad().right, attributes.pad().top,
111 const DWCComputeKernelInfo dwc_info =
112 t->configure(
src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
114 settings.fast_relaxed_math(
122 .export_input_to_cl_image(dwc_info.export_input_to_cl_image)
123 .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image);
125 ArgumentPack<ITensorInfo> arguments;
127 arguments.add_const_tensor(
ACL_SRC_1, wei);
128 arguments.add_const_tensor(
ACL_SRC_2, bia);
129 arguments.add_const_tensor(
ACL_DST_0, dst_info_to_validate_ptr);
151 return is_supported_op_helper(
context,
src, wei, bia,
nullptr, attributes);
174 calculate_and_init_dst_if_empty(&dst_info_to_validate,
src, wei, attributes);
186 "Operator fusion test failed. This operator cannot be fused into the workload");
189 return is_supported_op_helper(*
sketch.
gpu_context(),
src, wei, bia, &dst_info_to_validate, attributes);
205 calculate_and_init_dst_if_empty(
dst,
src, wei, attributes);
218 const auto properties =
223 attributes.
pad().left, attributes.
pad().right, attributes.
pad().top,
237 if (settings.export_input_to_cl_image())
242 if (settings.export_weights_to_cl_image())
Operator new_operator(const GpuOperatorType &operator_type, const ArgumentPack< ITensorInfo > &tensors) const
Create a new operator.
SimpleTensor< float > src
ClComponentDepthwiseConv2dSettings Settings
Settings are a set of backend-specific parameters that influence the implementation of a component.
@ Run
Run every time after the first time.
TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
Calculate the depthwise convolution output shape of a tensor.
@ NHWC
Num samples, height, width, channels.
unsigned int n0
Number of columns processed by each thread.
ITensorInfo * create_virtual_tensor()
Create a virtual (see MemoryType) tensor info and save it.
const GpuOperatorGroup & operator_group() const
Get operator group.
bool try_add_operator(const Operator &op, bool is_output=false) const
Try adding (without actually adding) an operator to the group.
static std::unique_ptr< IClDWCNativeKernelConfig > create(GPUTarget gpu)
Static method to call the ClDWCNative kernel configuration class accordingly with the GPU target.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
A multi-input (tensors), multi-output (tensors) acyclic directed graph of gpu kernel components Its m...
unsigned int m0
Number of rows processed by each thread.
This is a generic class that packs the arguments of an operator.
const GpuWorkloadContext * gpu_context() const
Get the gpu workload context of this sketch.
Implementation & implementation()
Get the implementation.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
bool has_valid_id() const
Check if the tensor id is valid.
@ Complex
Complex operators are operators that are not simple but are still fusable with simple ones.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
const GpuKernelComponentGraph & component_graph() const
Get component graph.
A descriptor of a workload of operators.
GPUTarget get_arch_from_target(GPUTarget target)
Helper function to get the GPU arch.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Provide context necessary for the creation and configuration of a workload e.g.
void add_const_tensor(Id id, const T *tensor)
Add const tensor to the pack.
const Context * context() const
Get workload context.
bool export_input_to_cl_image
Export input to cl_image.
bool export_weights_to_cl_image
Export the weights to cl_image.
KernelProperties & stage(const UnitWorkloadStage &stage)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(t,...)
void update_padding_for_cl_image(ITensorInfo *tensor)
Update padding required to export the OpenCL buffer to OpenCL image2d.
Describes when a unit workload is run.
GPUTarget
Available GPU Targets.
GpuOperatorType
Contain properties common to all operator types.
GpuTarget gpu_target() const
Get GpuTarget of the context.
Store the tensor's metadata.
void add_new_component(Args &&...args)
Create a new component and add it to the component graph Component id is automatically allocated.
DepthwiseConv2dAttributes & depth_multiplier(const uint32_t &depth_multiplier)
Set depth multiplier.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
An operator for the sole purpose of validating fusion.
Copyright (c) 2017-2024 Arm Limited.
@ F16
16-bit floating-point number
Compute descriptor used by the depthwise convolution native kernel.
DepthwiseConv2dAttributes & pad(const Padding2D &pad)
Set padding.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
static Status validate(const Properties &properties, const ArgumentPack< ITensorInfo > &tensors, const Attributes &attributes, const Settings &settings)
Validate the component.
KernelProperties Properties
static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *wei, const ITensorInfo *bia, const Attributes &attributes)
Check if the operator configuration is supported, irrespective of fusion.
Store the tensor's metadata.
Attributes are backend-agnostic parameters (in addition to the input/output tensors) of an operator.
#define ARM_COMPUTE_RETURN_ERROR_MSG(...)
An error is returned with the given description.
@ F32
32-bit floating-point number
virtual bool are_values_constant() const =0
Flag indicating whether the values of the tensor are constant, meaning that they can change on kernel...
static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ITensorInfo *wei, const ITensorInfo *bia, const Attributes &attributes)
Check if the operator configuration is supported and if it can be fused into the workload sketch.
void add_operator(const Operator &op, bool is_output=false)
Add an operator to the group.
static ITensorInfo * create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorInfo *wei, ITensorInfo *bia, const Attributes &attributes)
Create an operator and fuse it into the workload sketch.
DepthwiseConv2dAttributes & dilation(const Size2D &dilation)
Set dilation.
#define ARM_COMPUTE_LOG_PARAMS(...)
::arm_compute::GPUTarget GpuTarget
Gpu Information such as the Gpu target (for example, G76)
DepthwiseConv2dAttributes & stride(const Size2D &stride)
Set stride.