76 : _memory_group(
std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _original_b(nullptr), _is_interleaved_transposed(false),
77 _run_addition(false), _reshape_b_only_on_first_run(false), _is_prepared(false)
109 int mult_transpose1xW_width = 1;
110 int mult_interleave4x4_height = 1;
115 if(_is_interleaved_transposed)
121 _memory_group.
manage(&_tmp_a);
122 if(!_reshape_b_only_on_first_run)
124 _memory_group.
manage(&_tmp_b);
129 _interleave_kernel.
configure(a, &_tmp_a);
135 _mm_kernel.
configure(matrix_a, matrix_b, output, alpha, _is_interleaved_transposed,
GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height));
137 if(_is_interleaved_transposed)
141 if(!_reshape_b_only_on_first_run)
148 if(beta != 0 && c !=
nullptr)
151 _run_addition =
true;
167 if(_is_interleaved_transposed)
172 if(!_reshape_b_only_on_first_run)
196 if(_is_interleaved_transposed && _reshape_b_only_on_first_run)
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
void dispatch(IGCKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
void run() override
Run the kernels contained in the function.
GEMM reshape information class.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
bool is_used() const
Flags if the tensor is used or not.
1 channel, 1 F32 per channel
void memory_barrier()
Defines a barrier ordering memory transactions.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Store the tensor's metadata.
Interface for GLES Compute tensor.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Copyright (c) 2017-2021 Arm Limited.
1 channel, 1 F16 per channel
void set_target(GPUTarget target)
Set the targeted GPU architecture.
void mark_as_unused() const
Marks a tensor as unused.
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
static GCScheduler & get()
Access the scheduler singleton.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
void configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed=true, const GEMMReshapeInfo &reshape_info=GEMMReshapeInfo())
Initialise the kernel's input, output and alpha.
GCGEMM(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Default constructor.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const IGCTensor *c, const ITensorInfo *output, const float alpha, const float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of GCGEMM.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void prepare() override
Prepare the function for executing.
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
virtual void allocate()=0
Interface to be implemented by the child class to allocate the tensor.
Memory group resources scope handling class.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
GPUTarget
Available GPU Targets.
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
Interface to enqueue GLES kernels and get/set the GLES CommandQueue.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
bool reshape_b_only_on_first_run() const
Flag which specifies if the reshape of matrix B should executed only for the first.
void configure(const IGCTensor *input, IGCTensor *output, float beta)
Initialise the kernel's input, output and beta value.
GPUTarget get_target() const
Get the target GPU.
void configure(const IGCTensor *input, IGCTensor *output)
Initialise the kernel's input and output.
void configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor *c, IGCTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
ITensorAllocator * allocator()
Return a pointer to the tensor's allocator.
void configure(const IGCTensor *input, IGCTensor *output)
Initialise the kernel's input and output.