22.05
|
Common interface for all kernels implemented in C++. More...
#include <ICPPKernel.h>
Public Member Functions | |
virtual | ~ICPPKernel ()=default |
Default destructor. More... | |
virtual void | run (const Window &window, const ThreadInfo &info) |
Execute the kernel on the passed window. More... | |
virtual void | run_nd (const Window &window, const ThreadInfo &info, const Window &thread_locator) |
legacy compatibility layer for implemantions which do not support thread_locator In these cases we simply narrow the interface down the legacy version More... | |
virtual void | run_op (ITensorPack &tensors, const Window &window, const ThreadInfo &info) |
Execute the kernel on the passed window. More... | |
virtual size_t | get_mws (const CPUInfo &platform, size_t thread_count) const |
Return minimum workload size of the relevant kernel. More... | |
virtual const char * | name () const =0 |
Name of the kernel. More... | |
![]() | |
IKernel () | |
Constructor. More... | |
virtual | ~IKernel ()=default |
Destructor. More... | |
virtual bool | is_parallelisable () const |
Indicates whether or not the kernel is parallelisable. More... | |
virtual BorderSize | border_size () const |
The size of the border for that kernel. More... | |
const Window & | window () const |
The maximum window the kernel can be executed on. More... | |
bool | is_window_configured () const |
Function to check if the embedded window of this kernel has been configured. More... | |
Static Public Attributes | |
static constexpr size_t | default_mws = 1 |
Common interface for all kernels implemented in C++.
Definition at line 38 of file ICPPKernel.h.
|
virtualdefault |
Default destructor.
|
inlinevirtual |
Return minimum workload size of the relevant kernel.
[in] | platform | The CPU platform used to create the context. |
[in] | thread_count | Number of threads in the execution. |
Reimplemented in CpuGemmAssemblyWrapperKernel< TypeInput, TypeOutput >, CpuDepthwiseConv2dAssemblyWrapperKernel, CpuIm2ColKernel, CpuAddKernel, NEPadLayerKernel, CpuActivationKernel, and CpuReshapeKernel.
Definition at line 100 of file ICPPKernel.h.
References ARM_COMPUTE_UNUSED, ICPPKernel::default_mws, and ICPPKernel::name().
Referenced by IScheduler::run_tagged_workloads().
|
pure virtual |
Name of the kernel.
Implemented in CpuWinogradConv2dTransformWeightsKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuWinogradConv2dTransformOutputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuWinogradConv2dTransformInputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuComplexMulKernel, CpuGemmLowpMatrixBReductionKernel, CpuLogits1DSoftmaxKernel< IS_LOG >, CpuGemmLowpOffsetContributionOutputStageKernel, CpuIm2ColKernel, CpuGemmTranspose1xWKernel, CpuWeightsReshapeKernel, CpuMaxUnpoolingLayerKernel, CpuAddKernel, CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel, CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel, CpuGemmMatrixMultiplyKernel, CpuMulKernel, CpuCol2ImKernel, CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel, CpuGemmMatrixAdditionKernel, CpuDepthwiseConv2dAssemblyWrapperKernel, CpuGemmLowpQuantizeDownInt32ScaleKernel, CpuCastKernel, CpuDepthwiseConv2dNativeKernel, CpuDirectConv3dKernel, CpuGemmLowpOffsetContributionKernel, CpuGemmInterleave4x4Kernel, CpuActivationKernel, CpuScaleKernel, CpuSubKernel, CpuGemmLowpMatrixMultiplyKernel, CpuPool3dKernel, CpuDirectConv2dKernel, CpuDirectConv2dOutputStageKernel, CpuGemmLowpMatrixAReductionKernel, CpuGemmAssemblyWrapperKernel< TypeInput, TypeOutput >, CpuConcatenateDepthKernel, CpuFloorKernel, CpuElementwiseUnaryKernel, CpuConvertFullyConnectedWeightsKernel, CpuPool2dKernel, NELogicalKernel, CpuQuantizeKernel, CpuConcatenateHeightKernel, CpuConcatenateWidthKernel, CpuConcatenateBatchKernel, CpuPermuteKernel, CpuLogits1DMaxKernel, CpuCopyKernel, CpuReshapeKernel, CpuConvertQuantizedSignednessKernel, CpuDequantizeKernel, CpuTransposeKernel, NEGatherKernel, CpuPool2dAssemblyWrapperKernel, NECol2ImKernel, CpuElementwiseKernel< Derived >, CpuElementwiseKernel< CpuComparisonKernel >, CpuElementwiseKernel< CpuArithmeticKernel >, CpuFillKernel, NETileKernel, NESelectKernel, NEReductionOperationKernel, NEFFTRadixStageKernel, NERangeKernel, CPPNonMaximumSuppressionKernel, NEBitwiseAndKernel, NEBitwiseNotKernel, NEBitwiseOrKernel, NEBitwiseXorKernel, NEMeanStdDevNormalizationKernel, NEStridedSliceKernel, CPPPermuteKernel, NECropKernel, NEFFTScaleKernel, NEFillBorderKernel, CPPUpsampleKernel, NEBatchNormalizationLayerKernel, NEFFTDigitReverseKernel, NESpaceToBatchLayerKernel, NEStackLayerKernel, CPPBoxWithNonMaximaSuppressionLimitKernel, NEChannelShuffleLayerKernel, NEDepthToSpaceLayerKernel, NEFuseBatchNormalizationKernel, NEInstanceNormalizationLayerKernel, NENormalizationLayerKernel, NEQLSTMLayerNormalizationKernel, NEReorgLayerKernel, NEROIAlignLayerKernel, NESpaceToDepthLayerKernel, CPPTopKVKernel, NEBatchToSpaceLayerKernel, NEBoundingBoxTransformKernel, NEL2NormalizeLayerKernel, NEPadLayerKernel, NEPriorBoxLayerKernel, NEReverseKernel, NEComputeAllAnchorsKernel, and NEROIPoolingLayerKernel.
Referenced by ICPPKernel::get_mws(), and SchedulerClock< output_timestamps >::id().
|
inlinevirtual |
Execute the kernel on the passed window.
[in] | window | Region on which to execute the kernel. (Must be a region of the window returned by window()) |
[in] | info | Info about executing thread and CPU. |
Reimplemented in NESpaceToBatchLayerKernel, NEFuseBatchNormalizationKernel, NEBatchNormalizationLayerKernel, NECol2ImKernel, NECropKernel, NEBatchToSpaceLayerKernel, NEROIAlignLayerKernel, CPPNonMaximumSuppressionKernel, NEStackLayerKernel, NEBoundingBoxTransformKernel, NEFFTRadixStageKernel, NEReductionOperationKernel, NESelectKernel, NEFillBorderKernel, NEGatherKernel, NEL2NormalizeLayerKernel, NEPadLayerKernel, NEMeanStdDevNormalizationKernel, NENormalizationLayerKernel, NERangeKernel, NEFFTDigitReverseKernel, CPPPermuteKernel, CPPTopKVKernel, NEReorgLayerKernel, NEFFTScaleKernel, NEComputeAllAnchorsKernel, NEInstanceNormalizationLayerKernel, NEQLSTMLayerNormalizationKernel, CpuGemmAssemblyWrapperKernel< TypeInput, TypeOutput >, NEPriorBoxLayerKernel, NEChannelShuffleLayerKernel, NEDepthToSpaceLayerKernel, NEReverseKernel, NESpaceToDepthLayerKernel, NETileKernel, CPPBoxWithNonMaximaSuppressionLimitKernel, NEROIPoolingLayerKernel, NEBitwiseAndKernel, NEBitwiseOrKernel, NEBitwiseXorKernel, CPPUpsampleKernel, and NEBitwiseNotKernel.
Definition at line 57 of file ICPPKernel.h.
References ARM_COMPUTE_ERROR, and ARM_COMPUTE_UNUSED.
Referenced by IScheduler::num_threads_hint(), ICPPKernel::run_nd(), and SingleThreadScheduler::schedule().
|
inlinevirtual |
legacy compatibility layer for implemantions which do not support thread_locator In these cases we simply narrow the interface down the legacy version
[in] | window | Region on which to execute the kernel. (Must be a region of the window returned by window()) |
[in] | info | Info about executing thread and CPU. |
[in] | thread_locator | Specifies "where" the current thread is in the multi-dimensional space |
Reimplemented in CpuGemmAssemblyWrapperKernel< TypeInput, TypeOutput >.
Definition at line 70 of file ICPPKernel.h.
References ARM_COMPUTE_UNUSED, and ICPPKernel::run().
Referenced by IScheduler::num_threads_hint().
|
inlinevirtual |
Execute the kernel on the passed window.
[in] | tensors | A vector containing the tensors to operate on. |
[in] | window | Region on which to execute the kernel. (Must be a region of the window returned by window()) |
[in] | info | Info about executing thread and CPU. |
Reimplemented in CpuWinogradConv2dTransformWeightsKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuWinogradConv2dTransformOutputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuWinogradConv2dTransformInputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuComplexMulKernel, CpuGemmLowpMatrixBReductionKernel, CpuLogits1DSoftmaxKernel< IS_LOG >, CpuGemmLowpOffsetContributionOutputStageKernel, NEStridedSliceKernel, CpuIm2ColKernel, CpuGemmTranspose1xWKernel, CpuWeightsReshapeKernel, NEFillBorderKernel, CpuAddKernel, CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel, CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel, CpuGemmMatrixMultiplyKernel, CpuMulKernel, CpuPool2dAssemblyWrapperKernel, CpuCol2ImKernel, CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel, CpuGemmMatrixAdditionKernel, CpuDepthwiseConv2dAssemblyWrapperKernel, CpuGemmLowpQuantizeDownInt32ScaleKernel, CpuCastKernel, CpuDepthwiseConv2dNativeKernel, CpuDirectConv3dKernel, CpuGemmLowpOffsetContributionKernel, CpuGemmInterleave4x4Kernel, CpuActivationKernel, CpuScaleKernel, CpuSubKernel, CpuMaxUnpoolingLayerKernel, CpuGemmLowpMatrixMultiplyKernel, CpuPool3dKernel, CpuDirectConv2dKernel, CpuDirectConv2dOutputStageKernel, CpuGemmLowpMatrixAReductionKernel, CpuConcatenateDepthKernel, CpuFloorKernel, CpuElementwiseUnaryKernel, CpuConvertFullyConnectedWeightsKernel, CpuPool2dKernel, NELogicalKernel, CpuQuantizeKernel, CpuConcatenateHeightKernel, CpuConcatenateWidthKernel, CpuConcatenateBatchKernel, CpuPermuteKernel, CpuLogits1DMaxKernel, CpuCopyKernel, CpuReshapeKernel, CpuConvertQuantizedSignednessKernel, CpuDequantizeKernel, CpuTransposeKernel, CpuElementwiseKernel< Derived >, CpuElementwiseKernel< CpuComparisonKernel >, CpuElementwiseKernel< CpuArithmeticKernel >, and CpuFillKernel.
Definition at line 88 of file ICPPKernel.h.
References ARM_COMPUTE_UNUSED.
Referenced by CpuWinogradConv2dTransformInputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >::name(), CpuWinogradConv2dTransformOutputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >::name(), CpuWinogradConv2dTransformWeightsKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >::name(), IScheduler::num_threads_hint(), SingleThreadScheduler::schedule_op(), and OMPScheduler::schedule_op().
|
static |
Definition at line 41 of file ICPPKernel.h.
Referenced by CpuReshapeKernel::get_mws(), CpuActivationKernel::get_mws(), NEPadLayerKernel::get_mws(), CpuAddKernel::get_mws(), ICPPKernel::get_mws(), CpuIm2ColKernel::get_mws(), CpuDepthwiseConv2dAssemblyWrapperKernel::get_mws(), CpuGemmAssemblyWrapperKernel< TypeInput, TypeOutput >::get_mws(), and CpuPool2dAssemblyWrapperKernel::is_configured().