Compute Library
 21.02
NESoftmaxLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
30 
31 namespace arm_compute
32 {
33 template <bool IS_LOG>
34 struct NESoftmaxLayerGeneric<IS_LOG>::Impl
35 {
36  const ITensor *src{ nullptr };
37  ITensor *dst{ nullptr };
38  Tensor max{ nullptr };
39  Tensor tmp{ nullptr };
40  Tensor input_permuted{ nullptr };
41  Tensor output_permuted{ nullptr };
42  std::unique_ptr<cpu::CpuSoftmaxGeneric<IS_LOG>> op{ nullptr };
43 };
44 
45 template <bool IS_LOG>
46 NESoftmaxLayerGeneric<IS_LOG>::NESoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
47  : _memory_group(std::move(memory_manager)), _impl(std::make_unique<Impl>())
48 {
49 }
50 
51 template <bool IS_LOG>
53 template <bool IS_LOG>
55 template <bool IS_LOG>
57 
58 template <bool IS_LOG>
59 void NESoftmaxLayerGeneric<IS_LOG>::configure(ITensor *input, ITensor *output, float beta, int32_t axis)
60 {
61  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
62 
63  _impl->src = input;
64  _impl->dst = output;
65  _impl->op = std::make_unique<cpu::CpuSoftmaxGeneric<IS_LOG>>();
66  _impl->op->configure(input->info(), output->info(), beta, axis);
67 
68  const unsigned int actual_axis = static_cast<unsigned int>(wrap_around(axis, static_cast<int32_t>(input->info()->num_dimensions())));
69  const bool needs_permute = actual_axis > 0;
70  if(needs_permute)
71  {
72  // Add to the memory manager _input_permuted
73  auto permute_input = std::make_unique<cpu::CpuPermute>();
74  _memory_group.manage(&_impl->input_permuted);
75  permute_input->configure(input->info(), _impl->input_permuted.info(), softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis));
76  }
77 
78  // We want to deal with a 2D input. Either it is the permuted version of the original input (4D case)
79  // or it is the original input case (2D case)
80  ITensor *tmp_input = (needs_permute ? &_impl->input_permuted : input);
81 
82  // Create intermediate tensors shapes
83  const TensorInfo input_info = tmp_input->info()->clone()->reset_padding().set_is_resizable(true);
84  DataType tmp_data_type = is_data_type_quantized_asymmetric(tmp_input->info()->data_type()) ? DataType::F32 : tmp_input->info()->data_type();
85  TensorInfo tensor_info_tmp(input_info.clone()->set_data_type(tmp_data_type));
86 
87  // Init intermediate tensors
88  TensorShape max_sum_shape = tmp_input->info()->tensor_shape();
89  max_sum_shape.set(0, 1);
90  _impl->max.allocator()->init(input_info.clone()->set_tensor_shape(max_sum_shape));
91  _impl->tmp.allocator()->init(tensor_info_tmp);
92 
93  // Manage intermediate buffers
94  _memory_group.manage(&_impl->max);
95  _memory_group.manage(&_impl->tmp);
96 
97  // Configure kernels
98  auto max_kernel = std::make_unique<cpu::kernels::CpuLogits1DMaxKernel>();
99  auto softmax_kernel = std::make_unique<cpu::kernels::CpuLogits1DSoftmaxKernel<IS_LOG>>();
100  max_kernel->configure(tmp_input->info(), _impl->max.info());
101 
102  if(needs_permute)
103  {
104  auto permute_output = std::make_unique<cpu::CpuPermute>();
105  // Add to the memory manager _output_permuted
106  _memory_group.manage(&_impl->output_permuted);
107 
108  // The normalization kernel stores the result in a permuted output tensor
109  softmax_kernel->configure(tmp_input->info(), _impl->max.info(), _impl->output_permuted.info(), beta, _impl->tmp.info());
110  _impl->input_permuted.allocator()->allocate();
111 
112  // Re-permute the permuted output into the requested (4D) output
113  permute_output->configure(_impl->output_permuted.info(), output->info(), softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis));
114 
115  // Allocate the intermediate permuted tensors
116  _impl->output_permuted.allocator()->allocate();
117  }
118  else
119  {
120  softmax_kernel->configure(tmp_input->info(), _impl->max.info(), output->info(), beta, _impl->tmp.info());
121  }
122 
123  // Allocate intermediate buffers
124  _impl->max.allocator()->allocate();
125  _impl->tmp.allocator()->allocate();
126 }
127 
128 template <bool IS_LOG>
129 Status NESoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const ITensorInfo *output, float beta, int32_t axis)
130 {
133  return Status{};
134 }
135 
136 template <bool IS_LOG>
138 {
139  MemoryGroupResourceScope scope_mg(_memory_group);
140  ITensorPack pack;
141  pack.add_tensor(TensorType::ACL_SRC, _impl->src);
142  pack.add_tensor(TensorType::ACL_DST, _impl->dst);
143  pack.add_tensor(TensorType::ACL_INT_0, &_impl->tmp);
144  pack.add_tensor(TensorType::ACL_INT_1, &_impl->max);
145  pack.add_tensor(TensorType::ACL_INT_2, &_impl->input_permuted);
146  pack.add_tensor(TensorType::ACL_INT_3, &_impl->output_permuted);
147  _impl->op->run(pack);
148 }
149 
150 template class NESoftmaxLayerGeneric<false>;
151 template class NESoftmaxLayerGeneric<true>;
152 
153 } // namespace arm_compute
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)
Shape of a tensor.
Definition: TensorShape.h:39
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
Definition: TensorInfo.cpp:316
~NESoftmaxLayerGeneric()
Default destructor.
PermutationVector get_permutation_vector_from_softmax_axis(size_t axis)
Given a softmax axis, this function returns the permutation vector required to put the axis to the fr...
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204
virtual DataType data_type() const =0
Data type used for each element of the tensor.
1 channel, 1 F32 per channel
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
Status class.
Definition: Error.h:52
Interface for Neon tensor.
Definition: ITensor.h:36
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2021 Arm Limited.
Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Definition: MemoryGroup.h:79
T wrap_around(T x, T m)
Wrap-around a number within the range 0 <= x < m.
Definition: Helpers.h:231
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
void configure(ITensor *input, ITensor *output, float beta=1.0f, int32_t axis=0)
Set the input and output tensors.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Definition: Utils.h:1190
NESoftmaxLayerGeneric & operator=(const NESoftmaxLayerGeneric &)=delete
Prevent instances of this class from being copied (As this class contains pointers) ...
Memory group resources scope handling class.
Definition: IMemoryGroup.h:82
static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta=1.0f, int32_t axis=0)
Static function to check if given info will lead to a valid configuration of NESoftmaxLayer.
Tensor packing service.
Definition: ITensorPack.h:37
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45
DataType
Available data types.
Definition: Types.h:77
NESoftmaxLayerGeneric(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:79
Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer.
Definition: CpuSoftmax.h:57
void add_tensor(int id, ITensor *tensor)
Add tensor to the pack.
Definition: ITensorPack.cpp:30