Compute Library
 20.05
CLSoftmaxLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
30 #include "arm_compute/core/Types.h"
31 #include "arm_compute/core/Utils.h"
34 
35 namespace arm_compute
36 {
37 template <bool IS_LOG>
38 CLSoftmaxLayerGeneric<IS_LOG>::CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
39  : _memory_group(std::move(memory_manager)), _max_shift_exp_sum_kernel(), _norm_kernel(), _flatten_kernel_ptr(), _reshape_kernel(), _max(), _sum(), _tmp(), _input_flattened(), _output_flattened(),
40  _needs_flattening(false)
41 {
42 }
43 
44 template <bool IS_LOG>
46 {
47  configure_reshape_input_kernel(CLKernelLibrary::get().get_compile_context(), input, output, axis);
48 }
49 
50 template <bool IS_LOG>
51 void CLSoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t axis)
52 {
53  // Flatten the input
54  const TensorShape shape_flatten = misc::shape_calculator::compute_softmax_shape(input->info(), axis);
55 
56  // Initialize the flat input
57  _input_flattened.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
58 
59  // If we need to flatten the input, we can use CLFlattenKernel or CLReshapeKernel
60  // If flattening on the third axes, we use CLFlattenKernel.
61  // In all other cases we have to use CLReshapeKernel
62  if(axis != 3)
63  {
64  auto reshape_kernel_ptr = support::cpp14::make_unique<CLReshapeLayerKernel>();
65  reshape_kernel_ptr->configure(compile_context, input, &_input_flattened);
66  _flatten_kernel_ptr = std::move(reshape_kernel_ptr);
67  }
68  else
69  {
70  auto flatten_kernel_ptr = support::cpp14::make_unique<CLFlattenLayerKernel>();
71  flatten_kernel_ptr->configure(compile_context, input, &_input_flattened);
72  _flatten_kernel_ptr = std::move(flatten_kernel_ptr);
73  }
74 
75  // We need to init the output tensor here. Indeed, the reshape kernel expects
76  // both tensors to be already initialized
77  auto_init_if_empty(*output->info(), *input->info()->clone());
78 }
79 
80 template <bool IS_LOG>
81 void CLSoftmaxLayerGeneric<IS_LOG>::configure(const ICLTensor *input, ICLTensor *output, float beta, size_t axis)
82 {
83  configure(CLKernelLibrary::get().get_compile_context(), input, output, beta, axis);
84 }
85 
86 template <bool IS_LOG>
87 void CLSoftmaxLayerGeneric<IS_LOG>::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta, size_t axis)
88 {
89  // Perform validation step
92 
93  // We don't need flattening only in the case the input is 2D and axis is 1
94  _needs_flattening = axis != 1;
95 
96  // If we are dealing with a 4D tensor, we will:
97  // - Flatten the input, so that we end up with a [width*height*depth] * batches 2D tensor
98  // - Execute all the pipeline (reduction + normalization) on the flattened tensor
99  // - Reshape the flattened output into the real output
100  if(_needs_flattening)
101  {
102  // Add to the memory manager _input_flattened
103  _memory_group.manage(&_input_flattened);
104 
105  // Cofigure _flatten_kernel and _input_flattened
106  configure_reshape_input_kernel(input, output, axis);
107  }
108 
109  // We want to deal with a 2D input. Either it is the flattened version of the original input (4D case)
110  // or it is the original input case (2D case)
111  const ICLTensor *input_2D = (_needs_flattening ? &_input_flattened : input);
112 
113  // Create intermediate tensors shapes
114  TensorInfo input_info = input_2D->info()->clone()->reset_padding().set_is_resizable(true);
115  DataType tmp_data_type = is_data_type_quantized_asymmetric(input_2D->info()->data_type()) ? DataType::S32 : input_2D->info()->data_type();
116  TensorInfo tensor_info_tmp(input_info.clone()->set_data_type(tmp_data_type));
117  _tmp.allocator()->init(tensor_info_tmp);
118 
119  TensorShape max_sum_shape = input_2D->info()->tensor_shape();
120  max_sum_shape.set(0, 1);
121  _max.allocator()->init(input_info.clone()->set_tensor_shape(max_sum_shape));
122  _sum.allocator()->init(input_info.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type));
123 
124  // Set GPU target to kernels
125  _max_shift_exp_sum_kernel.set_target(CLScheduler::get().target());
126 
127  // Manage intermediate buffers
128  _memory_group.manage(&_tmp);
129  _memory_group.manage(&_max);
130  _memory_group.manage(&_sum);
131 
132  SoftmaxKernelInfo softmax_info;
133  softmax_info.beta = beta;
134  softmax_info.is_log = IS_LOG;
135  softmax_info.input_data_type = input_2D->info()->data_type();
136 
137  // Configure kernels
138  _max_shift_exp_sum_kernel.configure(compile_context, input_2D, &_max, &_tmp, &_sum, softmax_info);
139 
140  if(_needs_flattening)
141  {
142  // Add to the memory manager _output_flattened
143  _memory_group.manage(&_output_flattened);
144 
145  // The normalization kernel stores the result in a flat output tensor
146  _norm_kernel.configure(compile_context, &_tmp, &_sum, &_output_flattened, softmax_info);
147 
148  // Reshape the flat output into a the requested (4D) output
149  _reshape_kernel.configure(compile_context, &_output_flattened, output);
150 
151  // Allocate the intermediate flat tensors
152  _input_flattened.allocator()->allocate();
153  _output_flattened.allocator()->allocate();
154  }
155  else
156  {
157  // Softmax 2D case
158  _norm_kernel.configure(compile_context, &_tmp, &_sum, output, softmax_info);
159  }
160 
161  // Allocate intermediate buffers
162  _tmp.allocator()->allocate();
163  _max.allocator()->allocate();
164  _sum.allocator()->allocate();
165 }
166 
167 template <bool IS_LOG>
169 {
171  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() > 4, "Only up to 4 dimensions are supported");
172  ARM_COMPUTE_UNUSED(beta);
173 
174  // Create intermediate tensor info
175  DataType tmp_data_type = is_data_type_quantized_asymmetric(input->data_type()) ? DataType::S32 : input->data_type();
176  TensorInfo tensor_info_tmp(input->clone()->set_data_type(tmp_data_type).set_is_resizable(true));
177 
178  TensorShape max_sum_shape = input->tensor_shape();
179  max_sum_shape.set(0, 1);
180  TensorInfo tensor_info_max(input->clone()->set_tensor_shape(max_sum_shape).set_is_resizable(true));
181  TensorInfo tensor_info_sum(input->clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type).set_quantization_info(QuantizationInfo()).set_is_resizable(true));
182 
183  const bool needs_flattening = (axis != 1);
184 
185  if(needs_flattening)
186  {
188  TensorInfo tensor_info_flat(input->clone()->set_tensor_shape(shape_flatten).set_is_resizable(true));
189 
190  if(axis != 3)
191  {
193  }
194  else
195  {
197  }
198  }
199 
200  SoftmaxKernelInfo softmax_info;
201  softmax_info.beta = beta;
202  softmax_info.is_log = IS_LOG;
203  softmax_info.input_data_type = input->data_type();
204 
205  ARM_COMPUTE_RETURN_ON_ERROR(CLLogits1DMaxShiftExpSumKernel::validate(input, &tensor_info_max, &tensor_info_tmp, &tensor_info_sum));
206  ARM_COMPUTE_RETURN_ON_ERROR(CLLogits1DNormKernel::validate(&tensor_info_tmp, &tensor_info_sum, output, softmax_info));
207 
208  if(needs_flattening)
209  {
211  TensorInfo tensor_info_flat(input->clone()->set_tensor_shape(shape_flatten).set_is_resizable(true));
212  }
213 
214  return Status{};
215 }
216 
217 template <bool IS_LOG>
219 {
220  MemoryGroupResourceScope scope_mg(_memory_group);
221 
222  if(_needs_flattening)
223  {
224  CLScheduler::get().enqueue(*_flatten_kernel_ptr, false);
225  }
226 
227  CLScheduler::get().enqueue(_max_shift_exp_sum_kernel, false);
228  CLScheduler::get().enqueue(_norm_kernel, !_needs_flattening);
229 
230  if(_needs_flattening)
231  {
232  CLScheduler::get().enqueue(_reshape_kernel, true);
233  }
234 }
235 
236 template class CLSoftmaxLayerGeneric<false>;
237 template class CLSoftmaxLayerGeneric<true>;
238 
239 } // namespace arm_compute
void run() override
Run the kernels contained in the function.
Shape of a tensor.
Definition: TensorShape.h:39
static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta=1.0f, size_t axis=1)
Static function to check if given info will lead to a valid configuration of CLSoftmaxLayer.
void configure(const ICLTensor *input, ICLTensor *output, float beta=1.0f, size_t axis=1)
Set the input and output tensors.
CLSoftmaxLayerGeneric(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, const SoftmaxKernelInfo &info)
Static function to check if given info will lead to a valid configuration of CLLogits1DNormKernel.
static CLScheduler & get()
Access the scheduler singleton.
Definition: CLScheduler.cpp:99
float beta
A scaling factor for the exponent with default value 1.0.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis=1)
Calculate the softmax output shape of a tensor.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Status class.
Definition: Error.h:52
Copyright (c) 2017-2020 ARM Limited.
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: Helpers.inl:202
1 channel, 1 S32 per channel
Basic function to compute a SoftmaxLayer.
Quantization information.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
DataType input_data_type
Input tensor data type.
bool is_log
Flag used to perform Log Softmax operation.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLFlattenLayerKernel.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLReshapeLayerKernel.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
CLCompileContext class.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1153
void init(Format format)
Initialize the tensor info with just a format.
Definition: TensorInfo.cpp:107
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum)
Static function to check if given info will lead to a valid configuration of CLLogits1DMaxShiftExpSum...
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:78
Store the tensor's metadata.
Definition: TensorInfo.h:45
Descriptor used by the softmax kernels.
DataType
Available data types.
Definition: Types.h:77