Compute Library
 21.02
CLSoftmaxLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
25 
28 #include "arm_compute/core/Types.h"
29 #include "arm_compute/core/Utils.h"
32 #include "src/core/CL/ICLKernel.h"
36 
37 namespace arm_compute
38 {
39 template <bool IS_LOG>
40 CLSoftmaxLayerGeneric<IS_LOG>::CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
41  : _memory_group(std::move(memory_manager)),
42  _permute_input(),
43  _permute_output(),
44  _max_shift_exp_sum_kernel(std::make_unique<CLLogits1DMaxShiftExpSumKernel>()),
45  _norm_kernel(std::make_unique<CLLogits1DNormKernel>()),
46  _max(),
47  _sum(),
48  _tmp(),
49  _input_permuted(),
50  _output_permuted(),
51  _needs_permute()
52 {
53 }
54 
55 template <bool IS_LOG>
57 
58 template <bool IS_LOG>
59 void CLSoftmaxLayerGeneric<IS_LOG>::configure(const ICLTensor *input, ICLTensor *output, float beta, int32_t axis)
60 {
61  configure(CLKernelLibrary::get().get_compile_context(), input, output, beta, axis);
62 }
63 
64 template <bool IS_LOG>
65 void CLSoftmaxLayerGeneric<IS_LOG>::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta, int32_t axis)
66 {
67  // Perform validation step
68  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
70 
71  const size_t actual_axis = static_cast<size_t>(wrap_around(axis, static_cast<int32_t>(input->info()->num_dimensions())));
72 
73  _needs_permute = actual_axis != 0;
74  ICLTensor *tmp_output = output;
75  const ICLTensor *tmp_input = _needs_permute ? &_input_permuted : input;
76  if(_needs_permute)
77  {
78  _memory_group.manage(&_input_permuted);
79  _memory_group.manage(&_output_permuted);
80  _permute_input.configure(compile_context, input, &_input_permuted, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis));
81  tmp_output = &_output_permuted;
82  }
83 
84  // Create intermediate tensors
85  DataType tmp_data_type = is_data_type_quantized_asymmetric(tmp_input->info()->data_type()) ? DataType::S32 : tmp_input->info()->data_type();
86  TensorInfo tensor_info_tmp(tmp_input->info()->clone()->set_data_type(tmp_data_type));
87  _tmp.allocator()->init(tensor_info_tmp);
88  TensorShape max_sum_shape = tmp_input->info()->tensor_shape();
89  max_sum_shape.set(0, 1);
90  _max.allocator()->init(tmp_input->info()->clone()->set_tensor_shape(max_sum_shape));
91  _sum.allocator()->init(tmp_input->info()->clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type));
92 
93  // Set GPU target to kernels
94  _max_shift_exp_sum_kernel->set_target(CLScheduler::get().target());
95 
96  // Manage intermediate buffers
97  _memory_group.manage(&_tmp);
98  _memory_group.manage(&_max);
99  _memory_group.manage(&_sum);
100 
101  SoftmaxKernelInfo softmax_info;
102  softmax_info.beta = beta;
103  softmax_info.is_log = IS_LOG;
104  softmax_info.input_data_type = tmp_input->info()->data_type();
105 
106  // Configure kernels
107  _max_shift_exp_sum_kernel->configure(compile_context, tmp_input, &_max, &_tmp, &_sum, softmax_info);
108  _norm_kernel->configure(compile_context, &_tmp, &_sum, tmp_output, softmax_info);
109 
110  // Allocate intermediate buffers
111  _tmp.allocator()->allocate();
112  _max.allocator()->allocate();
113  _sum.allocator()->allocate();
114  if(_needs_permute)
115  {
116  _permute_output.configure(compile_context, &_output_permuted, output, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis));
117  _input_permuted.allocator()->allocate();
118  _output_permuted.allocator()->allocate();
119  }
120 }
121 
122 template <bool IS_LOG>
123 Status CLSoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const ITensorInfo *output, float beta, int32_t axis)
124 {
126  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() > 4, "Only up to 4 dimensions are supported");
127  ARM_COMPUTE_UNUSED(beta);
128  ARM_COMPUTE_RETURN_ERROR_ON(axis < static_cast<int32_t>(-input->num_dimensions()) || static_cast<int32_t>(input->num_dimensions()) <= axis);
129 
130  const size_t actual_axis = static_cast<size_t>(wrap_around(axis, static_cast<int32_t>(input->num_dimensions())));
131  const bool needs_permute = actual_axis != 0;
132  if(needs_permute)
133  {
135  const TensorShape permuted_shape = misc::shape_calculator::compute_permutation_output_shape(*input, permutation_vector);
136  TensorInfo input_permuted(input->clone()->set_tensor_shape(permuted_shape));
137  ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(input, &input_permuted, permutation_vector));
138  TensorInfo output_permuted(output->clone()->set_tensor_shape(permuted_shape));
139  ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(&output_permuted, output, permutation_vector));
140  }
141 
142  // Create intermediate tensor info
143  DataType tmp_data_type = is_data_type_quantized_asymmetric(input->data_type()) ? DataType::S32 : input->data_type();
144  TensorInfo tensor_info_tmp(input->clone()->set_data_type(tmp_data_type).set_is_resizable(true));
145 
146  TensorShape max_sum_shape = input->tensor_shape();
147  max_sum_shape.set(0, 1);
148  TensorInfo tensor_info_max(input->clone()->set_tensor_shape(max_sum_shape).set_is_resizable(true));
149  TensorInfo tensor_info_sum(input->clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type).set_quantization_info(QuantizationInfo()).set_is_resizable(true));
150 
151  SoftmaxKernelInfo softmax_info;
152  softmax_info.beta = beta;
153  softmax_info.is_log = IS_LOG;
154  softmax_info.input_data_type = input->data_type();
155 
156  ARM_COMPUTE_RETURN_ON_ERROR(CLLogits1DMaxShiftExpSumKernel::validate(input, &tensor_info_max, &tensor_info_tmp, &tensor_info_sum));
157  ARM_COMPUTE_RETURN_ON_ERROR(CLLogits1DNormKernel::validate(&tensor_info_tmp, &tensor_info_sum, output, softmax_info));
158 
159  return Status{};
160 }
161 
162 template <bool IS_LOG>
164 {
165  MemoryGroupResourceScope scope_mg(_memory_group);
166 
167  if(_needs_permute)
168  {
169  _permute_input.run();
170  }
171 
172  CLScheduler::get().enqueue(*_max_shift_exp_sum_kernel, false);
173  CLScheduler::get().enqueue(*_norm_kernel, !_needs_permute);
174 
175  if(_needs_permute)
176  {
177  _permute_output.run();
178  }
179 }
180 
181 template class CLSoftmaxLayerGeneric<false>;
182 template class CLSoftmaxLayerGeneric<true>;
183 
184 } // namespace arm_compute
Interface for max, shifting, exponentiating and summing the logits.
void run() override
Run the kernels contained in the function.
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Shape of a tensor.
Definition: TensorShape.h:39
TensorShape compute_permutation_output_shape(const ITensorInfo &input, const PermutationVector &perm)
Calculate the permuted shape of an input given a permutation vector.
CLSoftmaxLayerGeneric(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
PermutationVector get_permutation_vector_from_softmax_axis(size_t axis)
Given a softmax axis, this function returns the permutation vector required to put the axis to the fr...
static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, const SoftmaxKernelInfo &info)
Static function to check if given info will lead to a valid configuration of CLLogits1DNormKernel.
static CLScheduler & get()
Access the scheduler singleton.
float beta
A scaling factor for the exponent with default value 1.0.
void configure(const ICLTensor *input, ICLTensor *output, float beta=1.0f, int32_t axis=0)
Set the input and output tensors.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
CLTensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: CLTensor.cpp:61
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
void run() override
Run the kernels contained in the function.
Definition: CLPermute.cpp:71
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
Basic function to compute a SoftmaxLayer.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
T wrap_around(T x, T m)
Wrap-around a number within the range 0 <= x < m.
Definition: Helpers.h:231
Quantization information.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
DataType input_data_type
Input tensor data type.
bool is_log
Flag used to perform Log Softmax operation.
Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by...
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta=1.0f, int32_t axis=0)
Static function to check if given info will lead to a valid configuration of CLSoftmaxLayer.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
CLCompileContext class.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1190
Strides of an item in bytes.
Definition: Strides.h:37
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
Interface for OpenCL tensor.
Definition: ICLTensor.h:42
static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum)
Static function to check if given info will lead to a valid configuration of CLLogits1DMaxShiftExpSum...
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
Set the input and output tensors.
Definition: CLPermute.cpp:50
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration of CLPermute.
Definition: CLPermute.cpp:66
Descriptor used by the softmax kernels.
DataType
Available data types.
Definition: Types.h:77
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:79
~CLSoftmaxLayerGeneric()
Default destructor.