Compute Library
 20.05
NESoftmaxLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
30 #include "utils/TypePrinter.h"
31 
32 #include <cfloat>
33 
34 namespace arm_compute
35 {
36 template <bool IS_LOG>
37 NESoftmaxLayerGeneric<IS_LOG>::NESoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
38  : _memory_group(std::move(memory_manager)), _max_kernel(), _softmax_kernel(), _flat_or_reshape_kernel_ptr(nullptr), _fill_border_kernel(), _reshape_kernel(), _max(), _tmp(), _input_flattened(),
39  _output_flattened(), _needs_flattening(false)
40 {
41 }
42 
43 template <bool IS_LOG>
45 {
46  // Flatten the input
48 
49  // Initialize the flat input
50  _input_flattened.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
51 
52  // If we need to flatten the input, we can use NEFlattenKernel or NEReshapeKernel
53  // If flattening on the third axes, we use NEFlattenKernel.
54  // In all other cases we have to use NEReshapeKernel
55  if(axis != 3)
56  {
57  auto reshape_kernel_ptr = support::cpp14::make_unique<NEReshapeLayerKernel>();
58  reshape_kernel_ptr->configure(input, &_input_flattened);
59  _flat_or_reshape_kernel_ptr = std::move(reshape_kernel_ptr);
60  }
61  else
62  {
63  auto flatten_kernel_ptr = support::cpp14::make_unique<NEFlattenLayerKernel>();
64  flatten_kernel_ptr->configure(input, &_input_flattened);
65  _flat_or_reshape_kernel_ptr = std::move(flatten_kernel_ptr);
66  }
67 
68  // We need to init the output tensor here. Indeed, the reshape kernel expects
69  // both tensors to be already initialized
70  auto_init_if_empty(*output->info(), *input->info()->clone());
71 }
72 
73 template <bool IS_LOG>
75 {
76  // Perform validation step
79 
80  // Handle negative axis, negative index is used to specify axis from the end (e.g. -1 for the last axis).
81  axis = wrap_around(axis, static_cast<int32_t>(input->info()->num_dimensions()));
82 
83  // We don't need flattening only in the case the input is 2D and axis is 1
84  _needs_flattening = axis != 1;
85 
86  // If we are dealing with a 4D tensor, we will:
87  // - Flatten the input, so that we end up with a [width*height*depth] * batches 2D tensor
88  // - Execute all the pipeline (reduction + normalization) on the flattened tensor
89  // - Reshape the flattened output into the real output
90  if(_needs_flattening)
91  {
92  // Add to the memory manager _input_flattened
93  _memory_group.manage(&_input_flattened);
94 
95  // Configure _flatten_kernel and _input_flattened
96  configure_reshape_input_kernel(input, output, axis);
97  }
98 
99  // We want to deal with a 2D input. Either it is the flattened version of the original input (4D case)
100  // or it is the original input case (2D case)
101  ITensor *input_2D = (_needs_flattening ? &_input_flattened : input);
102 
103  // Create intermediate tensors shapes
104  const TensorInfo input_info = input_2D->info()->clone()->reset_padding().set_is_resizable(true);
105  DataType tmp_data_type = is_data_type_quantized_asymmetric(input_2D->info()->data_type()) ? DataType::F32 : input_2D->info()->data_type();
106  TensorInfo tensor_info_tmp(input_info.clone()->set_data_type(tmp_data_type));
107 
108  // Init intermediate tensors
109  TensorShape max_sum_shape = input_2D->info()->tensor_shape();
110  max_sum_shape.set(0, 1);
111  _max.allocator()->init(input_info.clone()->set_tensor_shape(max_sum_shape));
112  _tmp.allocator()->init(tensor_info_tmp);
113 
114  // Manage intermediate buffers
115  _memory_group.manage(&_max);
116  _memory_group.manage(&_tmp);
117 
118  // Configure Kernels
119  _max_kernel.configure(input_2D, &_max);
120  if(_needs_flattening)
121  {
122  // Add to the memory manager _output_flattened
123  _memory_group.manage(&_output_flattened);
124 
125  // The normalization kernel stores the result in a flat output tensor
126  _softmax_kernel.configure(input_2D, &_max, &_output_flattened, beta, &_tmp);
127  _input_flattened.allocator()->allocate();
128 
129  // Reshape the flat output into the requested (4D) output
130  _reshape_kernel.configure(&_output_flattened, output);
131 
132  // Allocate the intermediate flat tensors
133  _output_flattened.allocator()->allocate();
134  }
135  else
136  {
137  // Softmax 2D case
138  _fill_border_kernel.configure(input_2D, _max_kernel.border_size(), BorderMode::REPLICATE);
139  _softmax_kernel.configure(input_2D, &_max, output, beta, &_tmp);
140  }
141 
142  // Allocate intermediate buffers
143  _max.allocator()->allocate();
144  _tmp.allocator()->allocate();
145 }
146 
147 template <bool IS_LOG>
149 {
150  // Perform validation step
152  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() > 4, "Only up to 4 dimensions are supported");
153  ARM_COMPUTE_UNUSED(beta);
154  ARM_COMPUTE_RETURN_ERROR_ON(axis < static_cast<int32_t>(-input->num_dimensions()) || static_cast<int32_t>(input->num_dimensions()) <= axis);
155 
156  // Handle negative axis, negative index is used to specify axis from the end (e.g. -1 for the last axis).
157  axis = wrap_around(axis, static_cast<int32_t>(input->num_dimensions()));
158 
159  // Create intermediate tensor info
160  DataType tmp_data_type = input->data_type();
161  const TensorInfo tensor_info_tmp(input->clone()->set_data_type(tmp_data_type).set_is_resizable(true));
162 
163  TensorShape max_sum_shape = input->tensor_shape();
164  max_sum_shape.set(0, 1);
165  const TensorInfo tensor_info_max_sum(input->clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type).set_quantization_info(input->quantization_info()).set_is_resizable(true));
166  const TensorInfo dont_care;
167 
168  const bool needs_flattening = (axis != 1);
169 
170  if(needs_flattening)
171  {
173  TensorInfo tensor_info_flat(input->clone()->set_tensor_shape(shape_flatten).set_is_resizable(true));
174 
175  if(axis != 3)
176  {
178  }
179  else
180  {
182  }
183  }
184 
186  ARM_COMPUTE_RETURN_ON_ERROR(NELogits1DSoftmaxKernel<IS_LOG>::validate(&tensor_info_tmp, &tensor_info_max_sum, output, beta, &dont_care));
187 
188  return Status{};
189 }
190 
191 template <bool IS_LOG>
193 {
194  MemoryGroupResourceScope scope_mg(_memory_group);
195 
196  if(_needs_flattening)
197  {
198  NEScheduler::get().schedule(_flat_or_reshape_kernel_ptr.get(), Window::DimY);
199  }
200 
201  NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY);
202  NEScheduler::get().schedule(&_max_kernel, Window::DimY);
203  NEScheduler::get().schedule(&_softmax_kernel, Window::DimY);
204 
205  if(_needs_flattening)
206  {
207  NEScheduler::get().schedule(&_reshape_kernel, Window::DimY);
208  }
209 }
210 
211 template class NESoftmaxLayerGeneric<false>;
212 template class NESoftmaxLayerGeneric<true>;
213 
214 } // namespace arm_compute
Shape of a tensor.
Definition: TensorShape.h:39
void configure(ITensor *input, ITensor *output, float beta=1.0f, int32_t axis=-1)
Set the input and output tensors.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis=1)
Calculate the softmax output shape of a tensor.
1 channel, 1 F32 per channel
Store the tensor's metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Interface for NEON tensor.
Definition: ITensor.h:36
Copyright (c) 2017-2020 ARM Limited.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: Helpers.inl:202
Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer.
T wrap_around(T x, T m)
Wrap-around a number within the range 0 <= x < m.
Definition: Helpers.h:799
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta=1.0f, int32_t axis=-1)
Static function to check if given info will lead to a valid configuration of NESoftmaxLayer.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1153
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
virtual void schedule(ICPPKernel *kernel, const Hints &hints)=0
Runs the kernel in the same thread as the caller synchronously.
Interface for softmax computation for QASYMM8 with pre-computed max.
Pixels outside the image are assumed to have the same value as the closest image pixel.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEReshapeLayerKernel.
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:78
Store the tensor's metadata.
Definition: TensorInfo.h:45
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NELogits1DMaxKernel.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of NEFlattenLayerKernel.
DataType
Available data types.
Definition: Types.h:77
NESoftmaxLayerGeneric(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
static IScheduler & get()
Access the scheduler singleton.
Definition: Scheduler.cpp:95