Compute Library
 20.05
NEGEMMLowpAssemblyMatrixMultiplyCore.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
26 #include "arm_compute/core/Error.h"
33 #include "arm_compute/core/Types.h"
37 #include "support/MemorySupport.h"
38 
39 using namespace arm_compute;
40 
42  : _memory_group(memory_manager), _asm_glue(memory_manager), _mm_kernel(nullptr), _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _tmp_a(), _tmp_b()
43 {
44 }
45 
47 {
51  ARM_COMPUTE_ERROR_ON_MSG((a)->info()->dimension(0) != (b)->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B");
52  ARM_COMPUTE_ERROR_ON_MSG((a)->info()->dimension(1) != (output)->info()->dimension(1), "The output matrix must have the same number of rows as the matrix A");
53  ARM_COMPUTE_ERROR_ON_MSG((b)->info()->dimension(0) != (output)->info()->dimension(0), "The output matrix must have the same number of columns as the matrix B");
54 
55  bool run_optimised = false;
56  switch(a->info()->data_type())
57  {
58  case DataType::S8:
59  case DataType::QASYMM8:
60  case DataType::U8:
61  {
62  _asm_glue.configure(a, b, c, output, GEMMInfo(false, false, true));
63  run_optimised = _asm_glue.is_configured();
64  break;
65  }
66  default:
67  {
68  ARM_COMPUTE_ERROR("Datatype not supported");
69  break;
70  }
71  }
72  if(!run_optimised)
73  {
74  // The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / 4.0f) ]
75  TensorShape shape_tmp_a = a->info()->tensor_shape();
76  shape_tmp_a.set(0, a->info()->dimension(0) * 4);
77  shape_tmp_a.set(1, std::ceil(a->info()->dimension(1) / 4.f));
78 
79  // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ]
80  TensorShape shape_tmp_b = b->info()->tensor_shape();
81  shape_tmp_b.set(0, b->info()->dimension(1) * 16);
82  shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 16.f));
83 
84  TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type());
85  TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type());
86  _tmp_a.allocator()->init(info_a);
87  _tmp_b.allocator()->init(info_b);
88  _memory_group.manage(&_tmp_a);
89  _memory_group.manage(&_tmp_b);
90 
91  // Configure interleave kernel
92  {
93  auto k = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>();
94  k->configure(a, &_tmp_a);
95  _mtx_a_reshape_kernel = std::move(k);
96  }
97 
98  // Configure transpose kernel
99  {
100  auto k = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>();
101  k->configure(b, &_tmp_b);
102  _mtx_b_reshape_kernel = std::move(k);
103  }
104 
105  // Configure matrix multiply kernel
106  {
107  auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();
108  k->configure(&_tmp_a, &_tmp_b, output);
109  _mm_kernel = std::move(k);
110  }
111 
112  // Allocate tensors
113  _tmp_a.allocator()->allocate();
114  _tmp_b.allocator()->allocate();
115  }
116 }
117 
119 {
120  MemoryGroupResourceScope scope_mg(_memory_group);
121  if(_mtx_a_reshape_kernel)
122  {
123  NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY);
124  }
125 
126  if(_mtx_b_reshape_kernel)
127  {
128  NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
129  }
130 
131  if(_asm_glue.is_configured())
132  {
133  _asm_glue.run();
134  }
135  else
136  {
137  NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY);
138  }
139 }
void run() override
Run the kernels contained in the function.
Shape of a tensor.
Definition: TensorShape.h:39
void init(const TensorAllocator &allocator, const Coordinates &coords, TensorInfo &sub_info)
Shares the same backing memory with another tensor allocator, while the tensor info might be differen...
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:543
SimpleTensor< float > b
Definition: DFT.cpp:157
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352
1 channel, 1 U8 per channel
virtual DataType data_type() const =0
Data type used for each element of the tensor.
void run() override
Run the kernels contained in the function.
Interface for NEON tensor.
Definition: ITensor.h:36
Copyright (c) 2017-2020 ARM Limited.
TensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Definition: Tensor.cpp:48
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
1 channel, 1 U32 per channel
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number unsigned
bool is_configured() const
Was the function successfully configured ?
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456
void allocate() override
Allocate size specified by TensorInfo of CPU memory.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output)
Initialise the kernel's inputs, output.
#define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:790
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:78
Store the tensor's metadata.
Definition: TensorInfo.h:45
void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, const GEMMInfo &gemm_info)
If supported create an ACL function else fallback to the arm_gemm function.
GEMM information class.
Definition: Types.h:1931
NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
signed 8-bit number
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:95