51 AsmGemmInfo init_assembly_metadata(
const GEMMInfo &
info)
55 asm_info.reinterpret_input_as_3d =
info.reinterpret_input_as_3d();
56 asm_info.depth_output_gemm3d =
info.depth_output_gemm3d();
57 asm_info.activation_info =
info.activation_info();
64 : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(std::make_unique<
NEGEMMAssemblyDispatch>()), _ma_kernel(),
65 _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false),
66 _run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false)
76 const AsmGemmInfo asm_info = init_assembly_metadata(gemm_info);
83 _run_vector_matrix_multiplication = a->
info()->
dimension(1) < 2;
85 _run_alpha_scale = alpha != 1.f;
92 const ITensor *c_to_use = is_c_bias ? c :
nullptr;
93 _asm_glue->configure(a,
b, c_to_use, d, asm_info);
105 ITensor *gemm_output_to_use = d;
106 if(_run_bias_addition)
108 gemm_output_to_use = &_tmp_d;
109 _memory_group.
manage(&_tmp_d);
112 _mm_kernel = std::make_unique<NEGEMMMatrixMultiplyKernel>();
115 if(_run_vector_matrix_multiplication)
118 _mm_kernel->configure(a,
b, gemm_output_to_use, alpha,
false);
129 shape_tmp_b.
set(0,
b->info()->dimension(1) * transpose_w);
130 shape_tmp_b.
set(1, std::ceil(
b->info()->dimension(0) / static_cast<float>(transpose_w)));
132 TensorInfo info_a = a->
info()->
clone()->set_tensor_shape(shape_tmp_a).set_is_resizable(
true);
133 TensorInfo info_b =
b->info()->clone()->set_tensor_shape(shape_tmp_b).set_is_resizable(
true);
139 _memory_group.
manage(&_tmp_a);
140 if(!_reshape_b_only_on_first_run)
142 _memory_group.
manage(&_tmp_b);
146 int n =
b->info()->dimension(0);
150 _interleave_kernel = std::make_unique<NEGEMMInterleave4x4Kernel>();
151 _interleave_kernel->configure(a, &_tmp_a);
154 _transpose_kernel = std::make_unique<NEGEMMTranspose1xWKernel>();
155 _transpose_kernel->configure(
b, &_tmp_b);
158 _mm_kernel->configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha,
true,
GEMMReshapeInfo(m, n, k));
162 if(!_reshape_b_only_on_first_run)
168 if(_run_bias_addition)
178 _ma_kernel = std::make_unique<NEGEMMMatrixAdditionKernel>();
179 _ma_kernel->configure(c, d, beta);
186 _activation_func.
configure(d,
nullptr, activation);
207 if(c !=
nullptr && !is_c_bias)
238 AsmGemmInfo asm_info = init_assembly_metadata(gemm_info);
247 const bool run_vector_matrix_multiplication = a->
dimension(1) < 2;
255 const int n =
b->dimension(0);
257 int mult_transpose1xW_width = 1;
258 int mult_interleave4x4_height = 1;
269 if(run_interleave_transpose)
271 matrix_a_info = &tmp_a_info;
272 matrix_b_info = &tmp_b_info;
294 if(beta != 0 && c !=
nullptr && !is_c_bias)
315 if(_asm_glue->is_configured())
320 _alpha_scale_func.
run();
325 if(!_run_vector_matrix_multiplication)
330 if(!_reshape_b_only_on_first_run)
340 if(_run_bias_addition)
355 _activation_func.
run();
363 const bool original_b_managed_by_weights_manager = _weights_manager && _weights_manager->
are_weights_managed(_original_b);
364 if(_asm_glue->is_configured())
366 if(!original_b_managed_by_weights_manager)
371 _asm_glue->prepare();
372 if(!original_b_managed_by_weights_manager)
377 else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue->is_configured())
379 if(!original_b_managed_by_weights_manager)
386 if(!original_b_managed_by_weights_manager)
~NEGEMM()
Default destructor.
TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInfo &b, int mult_transpose1xW_width=1)
Calculate the transposed 1xW width element shape.
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEArithmeticAddition.
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
bool enabled() const
Check if initialised.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor)
GEMM reshape information class.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
Calculate the matrix multiplication output shape of two tensors.
bool is_used() const
Flags if the tensor is used or not.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
[NEActivationLayer snippet]
void run() override
Run the kernels contained in the function.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Store the tensor's metadata.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
int depth_output_gemm3d() const
Depth of the output when GEMM output is reinterpreted as 3D tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Activation Layer Information class.
Interface for CPU tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta)
Static function to check if given info will lead to a valid configuration of NEGEMMMatrixAdditionKern...
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Initialise the kernel's inputs, output and conversion policy.
TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height=1, bool reinterpret_input_as_3d=false)
Calculate the interleaved shape of an input tensor.
NEGEMM(std::shared_ptr< IMemoryManager > memory_manager=nullptr, IWeightsManager *weights_manager=nullptr)
Constructor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEGEMMTranspose1xWKernel...
Copyright (c) 2017-2021 Arm Limited.
bool is_b_reshaped() const
Flag which specifies if the matrix B has been reshaped.
1 channel, 1 F16 per channel
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
void mark_as_unused() const
Marks a tensor as unused.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
16-bit brain floating-point number
bool are_weights_managed(const ITensor *weights)
Check if the weights are managed.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of NEGEMM.
void run() override
Run the kernels contained in the function.
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
static bool is_activation_supported(const ActivationLayerInfo &activation)
Checks if activation is supported by the gemm assembly dispatcher.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEGEMMInterleave4x4Kerne...
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const AsmGemmInfo &info)
Indicates whether or not this function can be used to process the given parameters.
bool reinterpret_input_as_3d() const
Flag which specifies if the input tensor has to be reinterpreted as 3D.
Weights manager interface to handle weights transformations.
bool is_a_reshaped() const
Flag which specifies if the matrix A has been reshaped.
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Memory group resources scope handling class.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info)
[NEActivationLayer snippet]
void prepare() override
Prepare the function for executing.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Store the tensor's metadata.
bool reshape_b_only_on_first_run() const
Flag which specifies if the reshape of matrix B should executed only for the first.
static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info)
Static function to check if given info will lead to a valid configuration of NEGEMMMatrixMultiplyKern...
ActivationLayerInfo activation_info() const
Activation layer to apply after the matrix multiplication.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
static IScheduler & get()
Access the scheduler singleton.