24 #ifndef ARM_COMPUTE_CL_SOFTMAX_KERNEL_H 25 #define ARM_COMPUTE_CL_SOFTMAX_KERNEL_H 43 static const unsigned int _grid_size;
45 static const unsigned int _serial_vector_size;
47 static const unsigned int _parallel_vector_size;
void configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &max, ITensorInfo &dst, ITensorInfo &sum, const SoftmaxKernelInfo &info)
Configure the kernel using the given information about tensors.
const Window & window() const
The maximum window the kernel can be executed on.
void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClLogits1DMaxShiftExpSumKernel)
static ParallelReductionInfo is_parallel_reduction(size_t size)
Checks if the given size is eligible for parallel reduction.
Store the tensor's metadata.
Interface for max, shifting, exponentiating and summing the logits.
std::tuple< bool, unsigned int > ParallelReductionInfo
Info for whether a parallel reduction will be run and the vector size of the execution.
Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by...
Common interface for all the OpenCL kernels.
SimpleTensor< float > src
Copyright (c) 2017-2022 Arm Limited.
static Status validate(const ITensorInfo &src, const ITensorInfo &max, const ITensorInfo &dst, const ITensorInfo &sum)
Static function to check if given info will lead to a valid configuration.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
Descriptor used by the softmax kernels.
Describe a multidimensional execution window.
ClLogits1DMaxShiftExpSumKernel()