21.08
|
Common interface for all kernels implemented in C++. More...
#include <ICPPKernel.h>
Public Member Functions | |
virtual | ~ICPPKernel ()=default |
Default destructor. More... | |
virtual void | run (const Window &window, const ThreadInfo &info) |
Execute the kernel on the passed window. More... | |
virtual void | run_nd (const Window &window, const ThreadInfo &info, const Window &thread_locator) |
legacy compatibility layer for implemantions which do not support thread_locator In these cases we simply narrow the interface down the legacy version More... | |
virtual void | run_op (ITensorPack &tensors, const Window &window, const ThreadInfo &info) |
Execute the kernel on the passed window. More... | |
virtual const char * | name () const =0 |
Name of the kernel. More... | |
Public Member Functions inherited from IKernel | |
IKernel () | |
Constructor. More... | |
virtual | ~IKernel ()=default |
Destructor. More... | |
virtual bool | is_parallelisable () const |
Indicates whether or not the kernel is parallelisable. More... | |
virtual BorderSize | border_size () const |
The size of the border for that kernel. More... | |
const Window & | window () const |
The maximum window the kernel can be executed on. More... | |
bool | is_window_configured () const |
Function to check if the embedded window of this kernel has been configured. More... | |
Common interface for all kernels implemented in C++.
Definition at line 38 of file ICPPKernel.h.
|
virtualdefault |
Default destructor.
|
pure virtual |
Name of the kernel.
Implemented in CpuWinogradConv2dTransformWeightsKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuWinogradConv2dTransformOutputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuWinogradConv2dTransformInputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuComplexMulKernel, CpuGemmLowpMatrixBReductionKernel, CpuGemmLowpOffsetContributionOutputStageKernel, CpuLogits1DSoftmaxKernel< IS_LOG >, CpuIm2ColKernel, CpuGemmTranspose1xWKernel, CpuWeightsReshapeKernel, CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel, CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel, CpuMulKernel, CpuCol2ImKernel, CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel, CpuGemmLowpQuantizeDownInt32ScaleKernel, CpuDepthwiseConv2dAssemblyWrapperKernel, CpuGemmLowpOffsetContributionKernel, CpuGemmInterleave4x4Kernel, CpuCastKernel, CpuDepthwiseConv2dNativeKernel, CpuGemmLowpMatrixMultiplyKernel, CpuAddKernel, CpuDirectConv2dOutputStageKernel, CpuGemmMatrixMultiplyKernel, CpuSubKernel, CpuGemmLowpMatrixAReductionKernel, CpuGemmMatrixAdditionKernel, CpuGemmAssemblyWrapperKernel< TypeInput, TypeOutput >, CpuConcatenateDepthKernel, CpuDirectConv2dKernel, CpuScaleKernel, CpuConvertFullyConnectedWeightsKernel, CpuFloorKernel, CpuElementwiseUnaryKernel, NELogicalKernel, CpuPool2dKernel, CpuQuantizeKernel, CpuActivationKernel, CpuConcatenateHeightKernel, CpuConcatenateWidthKernel, CpuConcatenateBatchKernel, CpuPermuteKernel, CpuCopyKernel, CpuReshapeKernel, CpuConvertQuantizedSignednessKernel, CpuDequantizeKernel, CpuLogits1DMaxKernel, CpuTransposeKernel, CpuElementwiseKernel, NEGatherKernel, CpuPool2dAssemblyWrapperKernel, NECol2ImKernel, CpuFillKernel, NETileKernel, NESelectKernel, NEReductionOperationKernel, NEFFTRadixStageKernel, NERangeKernel, CPPNonMaximumSuppressionKernel, NEBitwiseAndKernel, NEBitwiseNotKernel, NEBitwiseOrKernel, NEBitwiseXorKernel, NEMeanStdDevNormalizationKernel, NEMinMaxLayerKernel, NEStridedSliceKernel, CPPPermuteKernel, NECropKernel, NEFFTScaleKernel, NEFillBorderKernel, CPPUpsampleKernel, NEBatchNormalizationLayerKernel, NEFFTDigitReverseKernel, NESpaceToBatchLayerKernel, NEStackLayerKernel, CPPBoxWithNonMaximaSuppressionLimitKernel, NEChannelShuffleLayerKernel, NEDepthToSpaceLayerKernel, NEFuseBatchNormalizationKernel, NEInstanceNormalizationLayerKernel, NENormalizationLayerKernel, NEQLSTMLayerNormalizationKernel, NERemapKernel, NEReorgLayerKernel, NEROIAlignLayerKernel, NESpaceToDepthLayerKernel, CPPTopKVKernel, NEBatchToSpaceLayerKernel, NEBoundingBoxTransformKernel, NEL2NormalizeLayerKernel, NEMaxUnpoolingLayerKernel, NEPadLayerKernel, NEPriorBoxLayerKernel, NEReverseKernel, NEComputeAllAnchorsKernel, and NEROIPoolingLayerKernel.
Referenced by SchedulerClock< output_timestamps >::id(), and ICPPKernel::run_op().
|
inlinevirtual |
Execute the kernel on the passed window.
[in] | window | Region on which to execute the kernel. (Must be a region of the window returned by window()) |
[in] | info | Info about executing thread and CPU. |
Reimplemented in NESpaceToBatchLayerKernel, NEFuseBatchNormalizationKernel, NEBatchNormalizationLayerKernel, NECol2ImKernel, NECropKernel, NEBatchToSpaceLayerKernel, NEROIAlignLayerKernel, CPPNonMaximumSuppressionKernel, NEStackLayerKernel, NEBoundingBoxTransformKernel, NEFFTRadixStageKernel, NEReductionOperationKernel, NESelectKernel, NEFillBorderKernel, NEGatherKernel, NEMinMaxLayerKernel, NEL2NormalizeLayerKernel, NEPadLayerKernel, NEMeanStdDevNormalizationKernel, NEMaxUnpoolingLayerKernel, NENormalizationLayerKernel, NERangeKernel, NEFFTDigitReverseKernel, CPPPermuteKernel, CPPTopKVKernel, NEReorgLayerKernel, CpuGemmAssemblyWrapperKernel< TypeInput, TypeOutput >, NEFFTScaleKernel, NEComputeAllAnchorsKernel, NEInstanceNormalizationLayerKernel, NEQLSTMLayerNormalizationKernel, NEPriorBoxLayerKernel, NEChannelShuffleLayerKernel, NEDepthToSpaceLayerKernel, NEReverseKernel, NESpaceToDepthLayerKernel, NETileKernel, CPPBoxWithNonMaximaSuppressionLimitKernel, NEROIPoolingLayerKernel, NERemapKernel, NEBitwiseAndKernel, NEBitwiseOrKernel, NEBitwiseXorKernel, CPPUpsampleKernel, and NEBitwiseNotKernel.
Definition at line 55 of file ICPPKernel.h.
References ARM_COMPUTE_ERROR, and ARM_COMPUTE_UNUSED.
Referenced by IScheduler::num_threads_hint(), ICPPKernel::run_nd(), and SingleThreadScheduler::schedule().
|
inlinevirtual |
legacy compatibility layer for implemantions which do not support thread_locator In these cases we simply narrow the interface down the legacy version
[in] | window | Region on which to execute the kernel. (Must be a region of the window returned by window()) |
[in] | info | Info about executing thread and CPU. |
[in] | thread_locator | Specifies "where" the current thread is in the multi-dimensional space |
Reimplemented in CpuGemmAssemblyWrapperKernel< TypeInput, TypeOutput >.
Definition at line 68 of file ICPPKernel.h.
References ARM_COMPUTE_UNUSED, and ICPPKernel::run().
Referenced by IScheduler::num_threads_hint().
|
inlinevirtual |
Execute the kernel on the passed window.
[in] | tensors | A vector containing the tensors to operate on. |
[in] | window | Region on which to execute the kernel. (Must be a region of the window returned by window()) |
[in] | info | Info about executing thread and CPU. |
Reimplemented in CpuWinogradConv2dTransformWeightsKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuWinogradConv2dTransformOutputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuWinogradConv2dTransformInputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >, CpuComplexMulKernel, CpuGemmLowpMatrixBReductionKernel, CpuGemmLowpOffsetContributionOutputStageKernel, CpuLogits1DSoftmaxKernel< IS_LOG >, NEStridedSliceKernel, CpuIm2ColKernel, CpuGemmTranspose1xWKernel, CpuWeightsReshapeKernel, NEFillBorderKernel, CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel, CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel, CpuMulKernel, CpuPool2dAssemblyWrapperKernel, CpuCol2ImKernel, CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel, CpuGemmLowpQuantizeDownInt32ScaleKernel, CpuDepthwiseConv2dAssemblyWrapperKernel, CpuGemmLowpOffsetContributionKernel, CpuGemmInterleave4x4Kernel, CpuCastKernel, CpuDepthwiseConv2dNativeKernel, CpuGemmLowpMatrixMultiplyKernel, CpuAddKernel, CpuDirectConv2dOutputStageKernel, CpuGemmMatrixMultiplyKernel, CpuSubKernel, CpuGemmLowpMatrixAReductionKernel, CpuGemmMatrixAdditionKernel, CpuConcatenateDepthKernel, CpuDirectConv2dKernel, CpuScaleKernel, CpuConvertFullyConnectedWeightsKernel, CpuFloorKernel, CpuElementwiseUnaryKernel, NELogicalKernel, CpuPool2dKernel, CpuQuantizeKernel, CpuActivationKernel, CpuConcatenateHeightKernel, CpuConcatenateWidthKernel, CpuConcatenateBatchKernel, CpuPermuteKernel, CpuCopyKernel, CpuReshapeKernel, CpuConvertQuantizedSignednessKernel, CpuDequantizeKernel, CpuLogits1DMaxKernel, CpuTransposeKernel, CpuElementwiseKernel, and CpuFillKernel.
Definition at line 86 of file ICPPKernel.h.
References ARM_COMPUTE_UNUSED, and ICPPKernel::name().
Referenced by CpuWinogradConv2dTransformInputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >::name(), CpuWinogradConv2dTransformOutputKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >::name(), CpuWinogradConv2dTransformWeightsKernel< T, OutputTileRows, OutputTileCols, KernelRows, KernelCols >::name(), IScheduler::num_threads_hint(), SingleThreadScheduler::schedule_op(), and OMPScheduler::schedule_op().