Compute Library
 21.08
ClSoftmax.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
31 #include "support/Cast.h"
32 
33 using namespace arm_compute::experimental;
34 
35 namespace arm_compute
36 {
37 namespace opencl
38 {
39 ClSoftmax::ClSoftmax()
40  : _permute_input(std::make_unique<ClPermute>()),
41  _permute_output(std::make_unique<ClPermute>()),
42  _max_shift_exp_sum_kernel(std::make_unique<kernels::ClLogits1DMaxShiftExpSumKernel>()),
43  _norm_kernel(std::make_unique<kernels::ClLogits1DNormKernel>()),
44  _max_info(),
45  _sum_info(),
46  _tmp_info(),
47  _permuted_src_info(),
48  _permuted_dst_info(),
49  _aux_mem(InternalTensorIdx::COUNT)
50 {
51 }
52 
54 {
55  ARM_COMPUTE_ERROR_THROW_ON(validate(src, dst, info));
56 
57  const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions())));
58 
59  _needs_permute = actual_axis != 0;
60 
61  const ITensorInfo &tmp_input_info = _needs_permute ? _permuted_src_info : src;
62  ITensorInfo &tmp_output_info = _needs_permute ? _permuted_dst_info : dst;
63 
64  if(_needs_permute)
65  {
66  const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
67  _permute_input->configure(compile_context, &src, &_permuted_src_info, perm_info);
68  }
69 
70  DataType tmp_data_type = is_data_type_quantized_asymmetric(tmp_input_info.data_type()) ? DataType::S32 : tmp_input_info.data_type();
71  _tmp_info = tmp_input_info.clone()->set_data_type(tmp_data_type);
72 
73  TensorShape max_sum_shape = tmp_input_info.tensor_shape();
74  _max_info = tmp_input_info.clone()->set_tensor_shape(max_sum_shape);
75  _sum_info = tmp_input_info.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type);
76 
77  // Set GPU target to kernels
78  _max_shift_exp_sum_kernel->set_target(CLScheduler::get().target());
79 
80  _max_shift_exp_sum_kernel->configure(compile_context, tmp_input_info, _max_info, _tmp_info, _sum_info, info);
81  _norm_kernel->configure(compile_context, _tmp_info, _sum_info, tmp_output_info, info);
82 
83  if(_needs_permute)
84  {
85  const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
86  _permute_output->configure(compile_context, &_permuted_dst_info, &dst, perm_info);
87  }
88 
89  _aux_mem[InternalTensorIdx::SUM] = MemoryInfo(offset_int_vec(InternalTensorIdx::SUM), MemoryLifetime::Temporary, _sum_info.total_size());
90  _aux_mem[InternalTensorIdx::TMP] = MemoryInfo(offset_int_vec(InternalTensorIdx::TMP), MemoryLifetime::Temporary, _tmp_info.total_size());
91  _aux_mem[InternalTensorIdx::MAX] = MemoryInfo(offset_int_vec(InternalTensorIdx::MAX), MemoryLifetime::Temporary, _max_info.total_size());
92 
93  _aux_mem[InternalTensorIdx::PERMUTED_SRC] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_SRC), MemoryLifetime::Temporary, _permuted_src_info.total_size());
94  _aux_mem[InternalTensorIdx::PERMUTED_DST] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_DST), MemoryLifetime::Temporary, _permuted_dst_info.total_size());
95 }
96 
98 {
99  ARM_COMPUTE_RETURN_ERROR_ON_MSG(src.num_dimensions() > 4, "Only up to 4 dimensions are supported");
100  ARM_COMPUTE_UNUSED(info.beta);
101  ARM_COMPUTE_RETURN_ERROR_ON(info.axis < static_cast<int32_t>(-src.num_dimensions()) || static_cast<int32_t>(src.num_dimensions()) <= info.axis);
102 
103  const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions())));
104  const bool needs_permute = actual_axis != 0;
105  if(needs_permute)
106  {
108  const TensorShape permuted_shape = misc::shape_calculator::compute_permutation_output_shape(src, permutation_vector);
109  TensorInfo input_permuted(src.clone()->set_tensor_shape(permuted_shape));
110  ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&src, &input_permuted, permutation_vector));
111  TensorInfo output_permuted(dst.clone()->set_tensor_shape(permuted_shape));
112  ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&output_permuted, &dst, permutation_vector));
113  }
114 
115  // Create intermediate tensor info
117  TensorInfo tensor_info_tmp(src.clone()->set_data_type(tmp_data_type).set_is_resizable(true));
118 
119  TensorShape max_sum_shape = src.tensor_shape();
120  max_sum_shape.set(0, 1);
121  TensorInfo tensor_info_max(src.clone()->set_tensor_shape(max_sum_shape).set_is_resizable(true));
122  TensorInfo tensor_info_sum(src.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type).set_quantization_info(QuantizationInfo()).set_is_resizable(true));
123 
124  ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClLogits1DMaxShiftExpSumKernel::validate(src, tensor_info_max, tensor_info_tmp, tensor_info_sum));
125  ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClLogits1DNormKernel::validate(tensor_info_tmp, tensor_info_sum, dst, info));
126 
127  return Status{};
128 }
129 
131 {
132  auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
133  auto dst = tensors.get_tensor(TensorType::ACL_DST);
134 
135  CLAuxTensorHandler sum(offset_int_vec(InternalTensorIdx::SUM), _sum_info, tensors, false);
136  CLAuxTensorHandler tmp(offset_int_vec(InternalTensorIdx::TMP), _tmp_info, tensors, false);
137  CLAuxTensorHandler max(offset_int_vec(InternalTensorIdx::MAX), _max_info, tensors, false);
138 
139  CLAuxTensorHandler permuted_src(offset_int_vec(InternalTensorIdx::PERMUTED_SRC), _permuted_src_info, tensors, false);
140  CLAuxTensorHandler permuted_dst(offset_int_vec(InternalTensorIdx::PERMUTED_DST), _permuted_dst_info, tensors, false);
141 
142  if(_needs_permute)
143  {
146  pack.add_tensor(TensorType::ACL_DST, permuted_src.get());
147  _permute_input.get()->run(pack);
148  }
149 
150  ITensorPack sum_pack;
151  ITensorPack norm_pack;
152  if(_needs_permute)
153  {
154  sum_pack.add_const_tensor(TensorType::ACL_SRC, permuted_src.get());
155  norm_pack.add_tensor(TensorType::ACL_DST, permuted_dst.get());
156  }
157  else
158  {
160  norm_pack.add_tensor(TensorType::ACL_DST, dst);
161  }
162  sum_pack.add_tensor(TensorType::ACL_DST, tmp.get());
163  sum_pack.add_tensor(TensorType::ACL_INT_0, max.get());
164  sum_pack.add_tensor(TensorType::ACL_INT_1, sum.get());
165 
166  norm_pack.add_const_tensor(TensorType::ACL_SRC, tmp.get());
167  norm_pack.add_tensor(TensorType::ACL_INT_0, sum.get());
168 
169  CLScheduler::get().enqueue_op(*_max_shift_exp_sum_kernel.get(), sum_pack, false);
170  CLScheduler::get().enqueue_op(*_norm_kernel.get(), norm_pack, false);
171 
172  if(_needs_permute)
173  {
175  pack.add_const_tensor(TensorType::ACL_SRC, permuted_dst.get());
177  _permute_output.get()->run(pack);
178  }
179 }
180 
182 {
183  return _aux_mem;
184 }
185 } // namespace opencl
186 } // namespace arm_compute
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Shape of a tensor.
Definition: TensorShape.h:39
TensorShape compute_permutation_output_shape(const ITensorInfo &input, const PermutationVector &perm)
Calculate the permuted shape of an input given a permutation vector.
static Status validate(const ITensorInfo &src, const ITensorInfo &sum, const ITensorInfo &dst, const SoftmaxKernelInfo &info)
Static function to check if given info will lead to a valid configuration.
PermutationVector get_permutation_vector_from_softmax_axis(size_t axis)
Given a softmax axis, this function returns the permutation vector required to put the axis to the fr...
void add_const_tensor(int id, const ITensor *tensor)
Add const tensor to the pack.
Definition: ITensorPack.cpp:49
static CLScheduler & get()
Access the scheduler singleton.
void configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info)
Configure the operator.
Definition: ClSoftmax.cpp:53
static Status validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info)
Static function to check if given info will lead to a valid configuration.
Definition: ClSoftmax.cpp:97
float beta
A scaling factor for the exponent with default value 1.0.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
void run(ITensorPack &tensors) override
Run the kernels contained in the function.
Definition: ClSoftmax.cpp:130
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455
Interface for max, shifting, exponentiating and summing the logits.
Status class.
Definition: Error.h:52
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296
Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by...
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2021 Arm Limited.
std::vector< MemoryInfo > MemoryRequirements
Definition: Types.h:113
static Status validate(const ITensorInfo &src, const ITensorInfo &max, const ITensorInfo &dst, const ITensorInfo &sum)
Static function to check if given info will lead to a valid configuration.
1 channel, 1 S32 per channel
const ITensor * get_const_tensor(int id) const
Get constant tensor of a given id.
Definition: ITensorPack.cpp:54
T wrap_around(T x, T m)
Wrap-around a number within the range 0 <= x < m.
Definition: Helpers.h:231
Quantization information.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
size_t total_size() const override
Returns the total size of the tensor in bytes.
Definition: TensorInfo.h:250
void enqueue_op(ICLKernel &kernel, ITensorPack &tensors, bool flush=true)
Schedule the execution of the passed kernel if possible.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
experimental::MemoryRequirements workspace() const override
Return the memory requirements required by the workspace.
Definition: ClSoftmax.cpp:181
CLCompileContext class.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1003
Strides of an item in bytes.
Definition: Strides.h:37
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
ITensor * get_tensor(int id)
Get tensor of a given id from the pac.
Definition: ITensorPack.cpp:64
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244
Tensor packing service.
Definition: ITensorPack.h:39
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
int offset_int_vec(int offset)
Definition: MemoryHelpers.h:38
int32_t axis
The dimension in which to apply softmax.
Descriptor used by the softmax kernels.
Basic function to run kernels::ClPermuteKernel.
Definition: ClPermute.h:35
DataType
Available data types.
Definition: Types.h:77
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:79
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
Definition: ITensorPack.cpp:39
static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration.
Definition: ClPermute.cpp:40