Compute Library
 22.05
ClSoftmaxKernel.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CL_SOFTMAX_KERNEL_H
25 #define ARM_COMPUTE_CL_SOFTMAX_KERNEL_H
26 
27 #include "arm_compute/core/Error.h"
29 #include "src/core/common/Macros.h"
31 #include "src/gpu/cl/IClKernel.h"
32 
33 namespace arm_compute
34 {
35 namespace opencl
36 {
37 namespace kernels
38 {
39 /** Interface for max, shifting, exponentiating and summing the logits */
41 {
42  /**< Grid size (obtained through auto-tuning) */
43  static const unsigned int _grid_size;
44  /**< Vector size in the serial case (obtained through auto-tuning) */
45  static const unsigned int _serial_vector_size;
46  /**< Vector size in the parallel case (obtained through auto-tuning, enables the best memory access pattern for Bifrost) .*/
47  static const unsigned int _parallel_vector_size;
48 
49 public:
50  /** Info for whether a parallel reduction will be run and the vector size of the execution. */
51  using ParallelReductionInfo = std::tuple<bool, unsigned int>;
52 
55  /** Configure the kernel using the given information about tensors
56  *
57  * @param[in] compile_context The compile context to be used.
58  * @param[in] src Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
59  * @param[in,out] max Max values tensor. Data types supported: same as @p src
60  * @param[out] dst Destination tensor. Data types supported: same as @p src
61  * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p src
62  * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
63  */
64  void configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &max, ITensorInfo &dst, ITensorInfo &sum, const SoftmaxKernelInfo &info);
65  /** Static function to check if given info will lead to a valid configuration
66  *
67  * Similar to @ref ClLogits1DMaxShiftExpSumKernel::configure()
68  *
69  * @return a status
70  */
71  static Status validate(const ITensorInfo &src, const ITensorInfo &max, const ITensorInfo &dst, const ITensorInfo &sum);
72  /** Checks if the given size is eligible for parallel reduction
73  *
74  * @note Serial reduction is launched for width < (_grid_size * _serial_vector_size).
75  * @note Parallel reduction is launched for width >= (_grid_size * _serial_vector_size) and vector_size is forced to 4.
76  *
77  * @param[in] size Size to check
78  *
79  * @return A two-element tuple where the first element is a boolean specifying if a parallel reduction will be run,
80  * while the second element is the vector size of the execution.
81  */
82  static ParallelReductionInfo is_parallel_reduction(size_t size);
83 
84  // Inherited methods overridden:
85  void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
86 };
87 
88 /** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
90 {
91 public:
94 
95  /** Set the input and output tensors.
96  *
97  * @param[in] compile_context The compile context to be used.
98  * @param[in] src Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
99  * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
100  * @param[out] dst Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input
101  * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
102  */
103  void configure(const CLCompileContext &compile_context, const ITensorInfo &src, const ITensorInfo &sum, ITensorInfo &dst, const SoftmaxKernelInfo &info);
104  /** Static function to check if given info will lead to a valid configuration
105  *
106  * Similar to @ref ClLogits1DNormKernel::configure()
107  *
108  * @return a status
109  */
110  static Status validate(const ITensorInfo &src, const ITensorInfo &sum, const ITensorInfo &dst, const SoftmaxKernelInfo &info);
111 
112  // Inherited methods overridden:
113  void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
114 };
115 } // namespace kernels
116 } // namespace opencl
117 } // namespace arm_compute
118 #endif /* ARM_COMPUTE_CL_SOFTMAX_KERNEL_H */
void configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &max, ITensorInfo &dst, ITensorInfo &sum, const SoftmaxKernelInfo &info)
Configure the kernel using the given information about tensors.
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28
void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClLogits1DMaxShiftExpSumKernel)
static ParallelReductionInfo is_parallel_reduction(size_t size)
Checks if the given size is eligible for parallel reduction.
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40
Interface for max, shifting, exponentiating and summing the logits.
Status class.
Definition: Error.h:52
std::tuple< bool, unsigned int > ParallelReductionInfo
Info for whether a parallel reduction will be run and the vector size of the execution.
Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by...
Common interface for all the OpenCL kernels.
Definition: ICLKernel.h:81
SimpleTensor< float > src
Definition: DFT.cpp:155
Copyright (c) 2017-2022 Arm Limited.
static Status validate(const ITensorInfo &src, const ITensorInfo &max, const ITensorInfo &dst, const ITensorInfo &sum)
Static function to check if given info will lead to a valid configuration.
CLCompileContext class.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Tensor packing service.
Definition: ITensorPack.h:39
Descriptor used by the softmax kernels.
Describe a multidimensional execution window.
Definition: Window.h:39