Compute Library
 22.11
CpuGemm.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
30 #include "src/common/utils/Log.h"
31 #include "src/core/CPP/Validate.h"
35 
36 using namespace arm_compute::experimental;
38 
39 namespace arm_compute
40 {
41 namespace cpu
42 {
43 namespace
44 {
45 cpu::AsmGemmInfo init_assembly_metadata(const GEMMInfo &info)
46 {
47  cpu::AsmGemmInfo asm_info;
48  asm_info.method = cpu::AsmConvMethod::Im2Col;
49  asm_info.reinterpret_input_as_3d = info.reinterpret_input_as_3d();
50  asm_info.depth_output_gemm3d = info.depth_output_gemm3d();
51  asm_info.activation_info = info.activation_info();
52  asm_info.fast_mode = info.fast_math();
53  asm_info.fixed_format = info.fixed_format();
54  asm_info.weight_format = info.weight_format();
55 
56  return asm_info;
57 }
58 } // namespace
59 
60 void CpuGemm::configure(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, ITensorInfo *d, float alpha, float beta, const GEMMInfo &gemm_info)
61 {
63  ARM_COMPUTE_ERROR_THROW_ON(CpuGemm::validate(a, b, c, d, alpha, beta, gemm_info));
64  ARM_COMPUTE_LOG_PARAMS(a, b, c, d, alpha, beta, gemm_info);
65 
66  const cpu::AsmGemmInfo asm_info = init_assembly_metadata(gemm_info);
67  const bool is_c_bias = gemm_info.reshape_b_only_on_first_run();
68  bool run_optimised = bool(cpu::CpuGemmAssemblyDispatch::validate(a, b, (is_c_bias) ? c : nullptr, d, asm_info)) && gemm_info.reshape_b_only_on_first_run();
69 
70  // Check if we need to reshape the matrix B only on the first run
71  _is_prepared = false;
72  _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
73  _run_vector_matrix_multiplication = a->dimension(1) < 2;
74  _run_alpha_scale = alpha != 1.f;
75  _run_bias_addition = c != nullptr && gemm_info.reshape_b_only_on_first_run();
76  _run_addition = beta != 0 && c != nullptr && !gemm_info.reshape_b_only_on_first_run();
77  _run_activation = gemm_info.activation_info().enabled() && (!run_optimised || (run_optimised && !cpu::CpuGemmAssemblyDispatch::is_activation_supported(gemm_info.activation_info())));
78 
79  if(run_optimised)
80  {
81  const ITensorInfo *c_to_use = is_c_bias ? c : nullptr;
82  _asm_glue = std::make_unique<cpu::CpuGemmAssemblyDispatch>();
83  _asm_glue->configure(a, b, c_to_use, d, asm_info);
84  ARM_COMPUTE_ERROR_ON(!_asm_glue->is_configured());
85 
86  auto asm_mem_req = _asm_glue->workspace();
87  _aux_mem[AsmGemmWorkspace] = asm_mem_req[AsmGemmWorkspace];
88  _aux_mem[Pretraspose] = asm_mem_req[Pretraspose];
89 
90  // Scale product by alpha
91  if(_run_alpha_scale)
92  {
93  _alpha_scale_func = std::make_unique<cpu::CpuActivation>();
94  _alpha_scale_func->configure(d, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, alpha, 0.f));
95  }
96  }
97  else
98  {
99  // Pick output tensor in case bias addition should be performed
100  ITensorInfo *gemm_output_to_use = (_run_bias_addition) ? &_tmp_d : d;
101 
102  _mm_kernel = std::make_unique<cpu::kernels::CpuGemmMatrixMultiplyKernel>();
103 
104  // Select between GEMV and GEMM
105  if(_run_vector_matrix_multiplication)
106  {
107  // Configure the matrix multiply kernel
108  _mm_kernel->configure(a, b, gemm_output_to_use, alpha, false);
109  }
110  else
111  {
112  const int m = a->dimension(1);
113  const int n = b->dimension(0);
114  const int k = a->dimension(0);
115 
116  // Configure interleave kernel
117  _interleave_kernel = std::make_unique<cpu::kernels::CpuGemmInterleave4x4Kernel>();
118  _interleave_kernel->configure(a, &_tmp_a);
119  _aux_mem[InterleavedLHS] = MemoryInfo(offset_int_vec(InterleavedLHS), MemoryLifetime::Temporary, _tmp_a.total_size());
120 
121  // Configure transpose kernel
122  _transpose_kernel = std::make_unique<cpu::kernels::CpuGemmTranspose1xWKernel>();
123  _transpose_kernel->configure(b, &_tmp_b);
124  _aux_mem[TransposedRHS] = MemoryInfo(offset_int_vec(TransposedRHS), MemoryLifetime::Persistent, _tmp_b.total_size());
125 
126  // Configure matrix multiplication kernel
127  _mm_kernel->configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k));
128  }
129 
130  if(_run_bias_addition)
131  {
132  _add_bias = std::make_unique<cpu::CpuAdd>();
133  _add_bias->configure(gemm_output_to_use, c, d, ConvertPolicy::SATURATE);
134  _aux_mem[TempResult] = MemoryInfo(offset_int_vec(TempResult), MemoryLifetime::Temporary, _tmp_d.total_size());
135  }
136  }
137 
138  // Configure matrix addition kernel
139  if(_run_addition)
140  {
141  _ma_kernel = std::make_unique<cpu::kernels::CpuGemmMatrixAdditionKernel>();
142  _ma_kernel->configure(c, d, beta);
143  }
144 
145  // Configure activation
146  if(_run_activation)
147  {
148  _activation_func = std::make_unique<cpu::CpuActivation>();
149  _activation_func->configure(d, nullptr, gemm_info.activation_info());
150  }
151 }
152 
153 Status CpuGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, float alpha, float beta, const GEMMInfo &gemm_info)
154 {
155  ARM_COMPUTE_UNUSED(alpha);
156  const bool is_c_bias = gemm_info.reshape_b_only_on_first_run();
157 
162  ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->dimension(0) != b->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B");
163  ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");
164  ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");
165  if(a->data_type() != DataType::BFLOAT16)
166  {
168  }
169 
170  if(c != nullptr && !is_c_bias)
171  {
175  ARM_COMPUTE_RETURN_ERROR_ON_MSG(a->dimension(1) != c->dimension(1), "The C matrix must have the same number of rows as the matrix A");
176  ARM_COMPUTE_RETURN_ERROR_ON_MSG(b->dimension(0) != c->dimension(0), "The C matrix must have the same number of columns as the matrix B");
177  }
178 
179  if(d->total_size() != 0)
180  {
181  // For fixed format we are expecting some kind of blocked format for B/RHS so the dimension won't necessarily match the result matrix any more.
182  ARM_COMPUTE_RETURN_ERROR_ON(!gemm_info.fixed_format() && b->dimension(0) != d->dimension(0));
183  if(gemm_info.depth_output_gemm3d() != 0)
184  {
185  if(gemm_info.reinterpret_input_as_3d())
186  {
189  }
190  else
191  {
193  }
194  }
195  else
196  {
198  }
199  }
200 
201  // Check if we need to run the optimized assembly kernel
202  cpu::AsmGemmInfo asm_info = init_assembly_metadata(gemm_info);
203  const bool run_optimised = bool(cpu::CpuGemmAssemblyDispatch::validate(a, b, is_c_bias ? c : nullptr, d, asm_info));
204 
205  if(!run_optimised)
206  {
207  ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.reinterpret_input_as_3d(), "CpuGemm cannot reinterpret the input tensor as 3D");
208  ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.depth_output_gemm3d() != 0, "CpuGemm cannot reinterpret the output tensor as 3D");
209 
210  // Check if the first input tensor is a vector.
211  const bool run_vector_matrix_multiplication = a->dimension(1) < 2;
212  // Check if we need to reshape the matrix A and matrix B
213  const bool run_interleave_transpose = !run_vector_matrix_multiplication && !(gemm_info.reshape_b_only_on_first_run());
214 
215  // Arguments used by GEMMReshapeInfo
216  // If we pass the matrix A and matrix B reshaped to CpuGemmMatrixMultiplyKernel, we need to pass m, n, k, mult_transpose1xW_width and mult_interleave4x4_height to GEMMReshapeInfo
217  // in order to know how the matrices have been reshaped
218  const int m = a->dimension(1);
219  const int n = b->dimension(0);
220  const int k = a->dimension(0);
221  int mult_transpose1xW_width = 1;
222  int mult_interleave4x4_height = 1;
223 
224  const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, gemm_info.depth_output_gemm3d());
225 
226  const ITensorInfo *matrix_a_info = a;
227  const ITensorInfo *matrix_b_info = b;
228 
229  TensorInfo tmp_a_info{};
230  TensorInfo tmp_b_info{};
231  TensorInfo tmp_output_info = *d->clone();
232 
233  if(run_interleave_transpose)
234  {
235  matrix_a_info = &tmp_a_info;
236  matrix_b_info = &tmp_b_info;
237 
238  // Validate interleave kernel
239  auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_interleaved_shape(*a, mult_interleave4x4_height, gemm_info.reinterpret_input_as_3d())));
241 
242  // Validate transpose kernel
243  auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_transpose1xW_with_element_size_shape(*b, mult_transpose1xW_width)));
245  }
246 
247  // Validate matrix multiply
248  auto_init_if_empty(tmp_output_info, matrix_a_info->clone()->set_tensor_shape(compute_mm_shape(*matrix_a_info, *matrix_b_info, run_interleave_transpose, reshape_info)));
249  ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuGemmMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, &tmp_output_info, alpha, run_interleave_transpose, reshape_info));
250 
251  if(c != nullptr && gemm_info.reshape_b_only_on_first_run())
252  {
254  }
255  }
256 
257  // Validate matrix addition kernel
258  if(beta != 0 && c != nullptr && !is_c_bias)
259  {
261  }
262 
263  // Validate activation
264  const ActivationLayerInfo &activation = gemm_info.activation_info();
265  if(activation.enabled())
266  {
268  }
269 
270  return Status{};
271 }
272 
273 void CpuGemm::run(ITensorPack &tensors)
274 {
275  prepare(tensors);
276 
277  auto a = tensors.get_const_tensor(ACL_SRC_0);
278  auto b = tensors.get_const_tensor(ACL_SRC_1);
279  auto c = tensors.get_const_tensor(ACL_SRC_2);
280  auto d = tensors.get_tensor(ACL_DST);
281 
282  if(_asm_glue && _asm_glue->is_configured())
283  {
284  // Pass c to asm dispatch only if it's the bias tensor
285  ITensorPack asm_pack = tensors;
286  asm_pack.add_const_tensor(ACL_SRC_2, (_reshape_b_only_on_first_run) ? c : nullptr);
287  _asm_glue->run(asm_pack);
288  if(_run_alpha_scale)
289  {
290  ITensorPack pack{ { ACL_SRC, d }, { ACL_DST, d } };
291  _alpha_scale_func->run(pack);
292  }
293  }
294  else
295  {
296  CpuAuxTensorHandler interleaved_a(offset_int_vec(InterleavedLHS), _tmp_a, tensors, true);
297  CpuAuxTensorHandler transposed_b(offset_int_vec(TransposedRHS), _tmp_b, tensors, true);
298  CpuAuxTensorHandler temp_d(offset_int_vec(TempResult), _tmp_d, tensors, true);
299 
300  ITensorPack mm_pack{ { ACL_SRC_0, a }, { ACL_SRC_1, b }, { ACL_DST, (_run_bias_addition) ? temp_d.get() : d } };
301  if(!_run_vector_matrix_multiplication)
302  {
303  // Run interleave kernel
304  ITensorPack interleave_pack{ { ACL_SRC, a }, { ACL_DST, interleaved_a.get() } };
305  NEScheduler::get().schedule_op(_interleave_kernel.get(), Window::DimY, _interleave_kernel->window(), interleave_pack);
306 
307  if(!_reshape_b_only_on_first_run)
308  {
309  // Run transpose kernel
310  ITensorPack transpose_pack{ { ACL_SRC, b }, { ACL_DST, transposed_b.get() } };
311  NEScheduler::get().schedule_op(_transpose_kernel.get(), Window::DimY, _transpose_kernel->window(), transpose_pack);
312  }
313 
314  // Use reshaped matrices
315  mm_pack.add_const_tensor(ACL_SRC_0, interleaved_a.get());
316  mm_pack.add_const_tensor(ACL_SRC_1, transposed_b.get());
317  }
318 
319  NEScheduler::get().schedule_op(_mm_kernel.get(), _run_vector_matrix_multiplication ? Window::DimX : Window::DimY, _mm_kernel->window(), mm_pack);
320 
321  // Run bias addition kernel
322  if(_run_bias_addition)
323  {
324  ITensorPack pack{ { ACL_SRC_0, temp_d.get() }, { ACL_SRC_1, c }, { ACL_DST, d } };
325  _add_bias->run(pack);
326  }
327  }
328 
329  // Run matrix addition kernel
330  if(_run_addition)
331  {
332  ITensorPack c_add_pack{ { ACL_SRC, c }, { ACL_DST, d } };
333  NEScheduler::get().schedule_op(_ma_kernel.get(), Window::DimY, _ma_kernel->window(), c_add_pack);
334  }
335 
336  // Run activation function
337  if(_run_activation)
338  {
339  ITensorPack pack{ { ACL_SRC, d }, { ACL_DST, d } };
340  _activation_func->run(pack);
341  }
342 }
343 
344 void CpuGemm::prepare(ITensorPack &tensors)
345 {
346  if(!_is_prepared)
347  {
348  if(_asm_glue && _asm_glue->is_configured())
349  {
350  _asm_glue->prepare(tensors);
351  }
352  else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication)
353  {
354  const ITensor *b = tensors.get_const_tensor(ACL_SRC_1);
355  ITensor *b_aux = utils::cast::polymorphic_cast<ITensor *>(tensors.get_tensor(offset_int_vec(TransposedRHS)));
357 
358  CpuAuxTensorHandler transposed_b(_tmp_b, *b_aux);
359  ITensorPack transpose_pack{ { ACL_SRC, b }, { ACL_DST, transposed_b.get() } };
360  NEScheduler::get().schedule_op(_transpose_kernel.get(), Window::DimY, _transpose_kernel->window(), transpose_pack);
361  }
362  _is_prepared = true;
363  }
364 }
365 
366 experimental::MemoryRequirements CpuGemm::workspace() const
367 {
368  return _aux_mem;
369 }
370 
371 Status CpuGemm::has_opt_impl(arm_compute::WeightFormat &expected_weight_format, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d,
372  const GEMMInfo &gemm_info)
373 {
374  const cpu::AsmGemmInfo asm_info = init_assembly_metadata(gemm_info);
375 
376  return CpuGemmAssemblyDispatch::has_opt_impl(expected_weight_format, a, b, c, d, asm_info);
377 }
378 
379 bool CpuGemm::isVarWeightsKernel() const
380 {
381  return _asm_glue && _asm_glue->isVarWeightsKernel();
382 }
383 } // namespace cpu
384 } // namespace arm_compute
Status validate(const OperatorGraph &op_graph)
Return the validity of op_graph, usually after performing an operation (e.g.
TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInfo &b, int mult_transpose1xW_width=1)
Calculate the transposed 1xW width element shape.
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
Definition: TensorInfo.cpp:302
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor)
Definition: Validate.h:115
bool enabled() const
Check if initialised.
Definition: Types.h:1694
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor)
Definition: Validate.h:121
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
Definition: ITensorPack.cpp:49
SimpleTensor< float > b
Definition: DFT.cpp:157
static bool is_activation_supported(const ActivationLayerInfo &activation)
Checks if activation is supported by the gemm assembly dispatcher.
GEMM reshape information class.
Definition: Types.h:2159
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
Calculate the matrix multiplication output shape of two tensors.
virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors)=0
Runs the kernel in the same thread as the caller synchronously.
1 channel, 1 F32 per channel
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
Definition: Error.h:466
static Status validate(const ITensorInfo *src, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration of CpuGemmTranspose1xWKerne...
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
WeightFormat
Memory layouts for the weights tensor.
Definition: Types.h:2015
int depth_output_gemm3d() const
Depth of the output when GEMM output is reinterpreted as 3D tensor.
Definition: Types.h:2433
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info)
Static function to check if given info will lead to a valid configuration of CpuGemmMatrixMultiplyKer...
Activation Layer Information class.
Definition: Types.h:1639
Interface for CPU tensor.
Definition: ITensor.h:36
TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height=1, bool reinterpret_input_as_3d=false)
Calculate the interleaved shape of an input tensor.
Copyright (c) 2017-2022 Arm Limited.
bool is_b_reshaped() const
Flag which specifies if the matrix B has been reshaped.
Definition: Types.h:2415
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:134
1 channel, 1 F16 per channel
16-bit brain floating-point number
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:54
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *d, const AsmGemmInfo &info)
Indicates whether or not this function can be used to process the given parameters.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
bool reinterpret_input_as_3d() const
Flag which specifies if the input tensor has to be reinterpreted as 3D.
Definition: Types.h:2441
bool is_a_reshaped() const
Flag which specifies if the matrix A has been reshaped.
Definition: Types.h:2407
static Status validate(const ITensorInfo *src, const ITensorInfo *dst)
Static function to check if given info will lead to a valid configuration of CpuGemmInterleave4x4Kern...
static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy, const ActivationLayerInfo &act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration.
Definition: CpuAdd.cpp:45
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:64
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
Static function to check if given info will lead to a valid configuration.
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.
Target polymorphic_cast(Source *v)
Polymorphic cast between two types.
Definition: Cast.h:47
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:541
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:788
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
Tensor packing service.
Definition: ITensorPack.h:39
#define ARM_COMPUTE_LOG_PARAMS(...)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:157
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
bool reshape_b_only_on_first_run() const
Flag which specifies if the reshape of matrix B should executed only for the first.
Definition: Types.h:2425
int offset_int_vec(int offset)
Definition: MemoryHelpers.h:38
GEMM information class.
Definition: Types.h:2339
bool fixed_format() const
Flag which specifies if the GEMM operation is running fixed-format kernels.
Definition: Types.h:2569
im2col_func configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias)
ActivationLayerInfo activation_info() const
Activation layer to apply after the matrix multiplication.
Definition: Types.h:2537
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, float beta)
Static function to check if given info will lead to a valid configuration of CpuGemmMatrixAdditionKer...
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:94