24.04
|
Go to the documentation of this file.
40 namespace experimental
42 namespace dynamic_fusion
46 void calculate_and_init_dst_if_empty(ITensorInfo *
dst,
47 const ITensorInfo *lhs,
48 const ITensorInfo *rhs,
49 const MatMulAttributes &attributes,
50 const GpuMatMulSettings &settings)
54 if (
dst->total_size() == 0U)
57 lhs->tensor_shape(), rhs->tensor_shape(),
58 MatMulKernelInfo(attributes.adj_lhs(), attributes.adj_rhs(), settings.m0(), settings.n0(), settings.k0()));
67 Status is_supported_op_helper(
const GpuWorkloadContext &
context,
68 const ITensorInfo *lhs,
69 const ITensorInfo *rhs,
70 const ITensorInfo *
dst,
71 const MatMulAttributes &attributes,
72 const GpuMatMulSettings &settings)
76 TensorInfo dst_info_to_validate;
77 const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
81 dst_info_to_validate_ptr =
dst;
84 calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs, attributes, settings);
93 const auto cl_compile_ctx =
context.cl_compile_context();
97 const auto properties =
100 ArgumentPack<ITensorInfo> arguments;
101 arguments.add_const_tensor(
ACL_SRC_0, lhs);
102 arguments.add_const_tensor(
ACL_SRC_1, rhs);
103 arguments.add_const_tensor(
ACL_DST_0, dst_info_to_validate_ptr);
157 return is_supported_op_helper(
context, lhs, rhs,
nullptr, attributes, settings);
175 calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs, attributes, settings);
185 "Operator fusion test failed. This operator cannot be fused into the workload");
188 return is_supported_op_helper(*
sketch.
gpu_context(), lhs, rhs, &dst_info_to_validate, attributes, settings);
206 calculate_and_init_dst_if_empty(
dst, lhs, rhs, attributes, settings);
221 comp_graph.add_new_component<
ClComponentMatMul>(properties, arguments, attributes, settings);
Operator new_operator(const GpuOperatorType &operator_type, const ArgumentPack< ITensorInfo > &tensors) const
Create a new operator.
static ITensorInfo * create_op(GpuWorkloadSketch &sketch, ITensorInfo *lhs, ITensorInfo *rhs, const Attributes &attributes, const Settings &settings)
TensorShape compute_matmul_shape(const TensorShape &input0, const TensorShape &input1, const MatMulKernelInfo &matmul_info)
Calculate the matrix multiplication output shape of two tensors.
@ Run
Run every time after the first time.
ITensorInfo * create_virtual_tensor()
Create a virtual (see MemoryType) tensor info and save it.
const GpuOperatorGroup & operator_group() const
Get operator group.
bool try_add_operator(const Operator &op, bool is_output=false) const
Try adding (without actually adding) an operator to the group.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Attributes are backend-agnostic parameters (in addition to the input/output tensors) of an operator.
int m0() const
Get M0: number of rows processed by each work-item.
This is a generic class that packs the arguments of an operator.
const GpuWorkloadContext * gpu_context() const
Get the gpu workload context of this sketch.
Implementation & implementation()
Get the implementation.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Operator backend specific settings.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
bool has_valid_id() const
Check if the tensor id is valid.
@ Complex
Complex operators are operators that are not simple but are still fusable with simple ones.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
const GpuKernelComponentGraph & component_graph() const
Get component graph.
A descriptor of a workload of operators.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Provide context necessary for the creation and configuration of a workload e.g.
void add_const_tensor(Id id, const T *tensor)
Add const tensor to the pack.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
static Status validate(const Properties &properties, const ArgumentPack< ITensorInfo > &tensors, const Attributes &attributes, const Settings &settings)
Validate the component.
const Context * context() const
Get workload context.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
KernelProperties & stage(const UnitWorkloadStage &stage)
Describes when a unit workload is run.
static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs, const Attributes &attributes, const Settings &settings)
int k0() const
Get K0: number of inner accumulations.
GpuOperatorType
Contain properties common to all operator types.
Store the tensor's metadata.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Copyright (c) 2017-2024 Arm Limited.
@ F16
16-bit floating-point number
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs, const Attributes &attributes, const Settings &settings)
KernelProperties Properties
Store the tensor's metadata.
#define ARM_COMPUTE_RETURN_ERROR_MSG(...)
An error is returned with the given description.
@ F32
32-bit floating-point number
void add_operator(const Operator &op, bool is_output=false)
Add an operator to the group.
#define ARM_COMPUTE_LOG_PARAMS(...)
int n0() const
Get N0: number of columns processed by each work-item.