21.02
|
CLKernelLibrary class. More...
#include <CLKernelLibrary.h>
Public Member Functions | |
void | init (std::string kernel_path, cl::Context context, cl::Device device) |
Initialises the kernel library. More... | |
void | set_kernel_path (const std::string &kernel_path) |
Sets the path that the kernels reside in. More... | |
std::string | get_kernel_path () |
Gets the path that the kernels reside in. More... | |
std::pair< std::string, bool > | get_program (const std::string &program_name) const |
Gets the source of the selected program. More... | |
cl::Context & | context () |
Accessor for the associated CL context. More... | |
const cl::Device & | get_device () |
Gets the CL device for which the programs are created. More... | |
void | set_device (cl::Device device) |
Sets the CL device for which the programs are created. More... | |
std::string | get_device_version () |
Return the device version. More... | |
cl_uint | get_num_compute_units () |
Return the maximum number of compute units in the device. More... | |
Kernel | create_kernel (const std::string &kernel_name, const std::set< std::string > &build_options_set={}) const |
Creates a kernel from the kernel library. More... | |
size_t | max_local_workgroup_size (const cl::Kernel &kernel) const |
Find the maximum number of local work items in a workgroup can be supported for the kernel. More... | |
cl::NDRange | default_ndrange () const |
Return the default NDRange for the device. More... | |
void | clear_programs_cache () |
Clear the library's cache of binary programs. More... | |
const std::map< std::string, cl::Program > & | get_built_programs () const |
Access the cache of built OpenCL programs. More... | |
void | add_built_program (const std::string &built_program_name, const cl::Program &program) |
Add a new built program to the cache. More... | |
bool | fp16_supported () const |
Returns true if FP16 is supported by the CL device. More... | |
bool | int64_base_atomics_supported () const |
Returns true if int64_base_atomics extension is supported by the CL device. More... | |
std::string | get_program_name (const std::string &kernel_name) const |
Returns the program name given a kernel name. More... | |
bool | is_wbsm_supported () |
void | set_context (cl::Context context) |
Sets the CL context used to create programs. More... | |
CLCompileContext & | get_compile_context () |
Gets the compile context used. More... | |
Static Public Member Functions | |
static CLKernelLibrary & | get () |
Access the KernelLibrary singleton. More... | |
CLKernelLibrary class.
Definition at line 38 of file CLKernelLibrary.h.
void add_built_program | ( | const std::string & | built_program_name, |
const cl::Program & | program | ||
) |
Add a new built program to the cache.
[in] | built_program_name | Name of the program |
[in] | program | Built program to add to the cache |
Definition at line 1194 of file CLKernelLibrary.cpp.
References CLCompileContext::add_built_program().
Referenced by arm_compute::restore_program_cache_from_file().
void clear_programs_cache | ( | ) |
Clear the library's cache of binary programs.
Definition at line 1184 of file CLKernelLibrary.cpp.
References CLCompileContext::clear_programs_cache().
Referenced by Framework::run().
cl::Context & context | ( | ) |
Accessor for the associated CL context.
Definition at line 1159 of file CLKernelLibrary.cpp.
References CLCompileContext::context().
Referenced by CLScheduler::context().
Kernel create_kernel | ( | const std::string & | kernel_name, |
const std::set< std::string > & | build_options_set = {} |
||
) | const |
Creates a kernel from the kernel library.
Definition at line 1125 of file CLKernelLibrary.cpp.
References CLCompileContext::create_kernel(), CLKernelLibrary::get_program(), and CLKernelLibrary::get_program_name().
Referenced by arm_compute::create_opencl_kernel().
cl::NDRange default_ndrange | ( | ) | const |
Return the default NDRange for the device.
Definition at line 1270 of file CLKernelLibrary.cpp.
References CLCompileContext::default_ndrange().
Referenced by CLReductionOperationKernel::configure(), and CLArgMinMaxLayerKernel::configure().
bool fp16_supported | ( | ) | const |
Returns true if FP16 is supported by the CL device.
Definition at line 1199 of file CLKernelLibrary.cpp.
References CLCompileContext::fp16_supported().
|
static |
Access the KernelLibrary singleton.
This method has been deprecated and will be removed in future releases
Definition at line 1119 of file CLKernelLibrary.cpp.
Referenced by CLRuntimeContext::CLRuntimeContext(), CLGaussian5x5HorKernel::configure(), CLIntegralImageHorKernel::configure(), CLWarpPerspectiveKernel::configure(), CLTableLookupKernel::configure(), CLGEMMDefaultConfigNativeValhall::configure(), CLGEMMDefaultConfigNativeBifrost::configure(), CLGEMMDefaultConfigReshapedRHSOnlyBifrost::configure(), CLGEMMDefaultConfigReshapedBifrost::configure(), CLTableLookup::configure(), CLBox3x3Kernel::configure(), CLDilateKernel::configure(), CLErodeKernel::configure(), CLMedian3x3Kernel::configure(), CLGaussian3x3Kernel::configure(), CLScaleKernel::configure(), CLThresholdKernel::configure(), CLTransposeKernel::configure(), CLWarpAffineKernel::configure(), CLReverse::configure(), CLTile::configure(), CLDepthToSpaceLayer::configure(), CLNonMaximaSuppression3x3Kernel::configure(), CLBitwiseNot::configure(), CLFillBorder::configure(), CLAccumulateKernel::configure(), CLMeanStdDevNormalizationLayer::configure(), CLSelect::configure(), CLBitwiseAnd::configure(), CLBitwiseOr::configure(), CLBitwiseXor::configure(), CLDequantizationLayer::configure(), CLGather::configure(), CLReorgLayer::configure(), ClFillKernel::configure(), CLTranspose::configure(), CLAccumulate::configure(), CLPhase::configure(), CLComparison::configure(), CLMagnitude::configure(), CLPriorBoxLayer::configure(), CLRange::configure(), CLAbsoluteDifference::configure(), CLChannelExtract::configure(), CLChannelShuffleLayer::configure(), CLChannelCombine::configure(), CLConvertFullyConnectedWeights::configure(), CLNonLinearFilterKernel::configure(), CLFlattenLayer::configure(), CLQuantizationLayer::configure(), CLThreshold::configure(), CLColorConvert::configure(), CLLKTrackerInitKernel::configure(), CLFullyConnectedLayerReshapeWeights::configure(), CLNormalizePlanarYUVLayer::configure(), ClPermuteKernel::configure(), CLWarpPerspective::configure(), CLDequantizationLayerKernel::configure(), CLDepthwiseConvolutionLayerReshapeWeightsKernel::configure(), CLMinMaxLayerKernel::configure(), CLGradientKernel::configure(), CLMedian3x3::configure(), CLReduceMean::configure(), ClCropKernel::configure(), CLWarpAffine::configure(), CLBox3x3::configure(), CLDilate::configure(), CLErode::configure(), CLReverseKernel::configure(), CLGaussian3x3::configure(), CLChannelShuffleLayerKernel::configure(), CLGaussianPyramidHorKernel::configure(), CLNonMaximaSuppression3x3::configure(), CLSpaceToDepthLayerKernel::configure(), CLComputeAllAnchorsKernel::configure(), CLDepthToSpaceLayerKernel::configure(), CLHistogramKernel::configure(), CLBoundingBoxTransform::configure(), CLBatchToSpaceLayerKernel::configure(), CLNormalizationLayerKernel::configure(), CLInstanceNormalizationLayer::configure(), CLSpaceToBatchLayerKernel::configure(), CLTileKernel::configure(), CLDeconvolutionLayerUpsampleKernel::configure(), CLUnstack::configure(), CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(), CLDeconvolutionLayer::configure(), CLMinMaxKernel::configure(), CLQLSTMLayerNormalizationKernel::configure(), CLGatherKernel::configure(), CLReorgLayerKernel::configure(), CLRemapKernel::configure(), CLComparisonKernel::configure(), CLReshapeLayer::configure(), CLROIPoolingLayer::configure(), CLScharr3x3::configure(), CLDepthConvertLayerKernel::configure(), CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(), CLDerivativeKernel::configure(), CLMeanStdDevNormalizationKernel::configure(), CLFFTScaleKernel::configure(), CLSobel3x3Kernel::configure(), CLRemap::configure(), CLWinogradInputTransform::configure(), CLHOGOrientationBinningKernel::configure(), CLDerivative::configure(), CLFFTDigitReverseKernel::configure(), CLNormalizePlanarYUVLayerKernel::configure(), CLFill::configure(), CLPadLayerKernel::configure(), CLQuantizationLayerKernel::configure(), CLHistogram::configure(), CLSobel5x5HorKernel::configure(), CLSobel7x7HorKernel::configure(), CLLogits1DMaxShiftExpSumKernel::configure(), CLAbsoluteDifferenceKernel::configure(), CLGEMMLowpMatrixMultiplyNativeKernel::configure(), CLROIAlignLayer::configure(), CLCast::configure(), CLInstanceNormalizationLayerKernel::configure(), CLCopy::configure(), CLRsqrtLayer::configure(), CLFloor::configure(), CLPriorBoxLayerKernel::configure(), CLChannelExtractKernel::configure(), CLNonLinearFilter::configure(), CLColorConvertKernel::configure(), CLSpaceToDepthLayer::configure(), CLBoundingBoxTransformKernel::configure(), CLL2NormalizeLayerKernel::configure(), CLDepthConvertLayer::configure(), CLMagnitudePhaseKernel::configure(), CLMeanStdDevKernel::configure(), CLRangeKernel::configure(), CLReductionOperationKernel::configure(), CLPermute::configure(), CLRNNLayer::configure(), CLBatchToSpaceLayer::configure(), CLPoolingLayer::configure(), CLFFTRadixStageKernel::configure(), CLIntegralImage::configure(), CLChannelCombineKernel::configure(), CLConvertFullyConnectedWeightsKernel::configure(), CLStackLayerKernel::configure(), CLGEMMReshapeLHSMatrixKernel::configure(), CLConvolutionKernel< matrix_size >::configure(), CLMeanStdDev::configure(), CLWinogradInputTransformKernel::configure(), CLLogicalNot::configure(), CLGEMMMatrixMultiplyNativeKernel::configure(), CLFFT2D::configure(), CLFuseBatchNormalizationKernel::configure(), CLHOGGradient::configure(), CLROIPoolingLayerKernel::configure(), CLSobel3x3::configure(), CLWinogradFilterTransformKernel::configure(), CLBatchNormalizationLayerKernel::configure(), CLCrop::configure(), CLROIAlignLayerKernel::configure(), CLGEMMLowpMatrixMultiplyReshapedKernel::configure(), CLScale::configure(), CLArgMinMaxLayerKernel::configure(), CLHarrisScoreKernel::configure(), CLDirectConvolutionLayer::configure(), CLEqualizeHistogram::configure(), CLSpaceToBatchLayer::configure(), CLWinogradOutputTransformKernel::configure(), CLConvolution3x3::configure(), CLDepthwiseConvolutionLayerNativeKernel::configure(), CLDeconvolutionLayerUpsample::configure(), CLFFT1D::configure(), CLFastCornersKernel::configure(), CLFillBorderKernel::configure(), CLGaussian5x5VertKernel::configure(), CLMaxUnpoolingLayer::configure(), CLStackLayer::configure(), CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::configure(), CLActivationLayer::configure(), CLHOGDetectorKernel::configure(), CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(), CLStridedSlice::configure(), CLGEMMMatrixMultiplyKernel::configure(), CLDeconvolutionReshapeOutputKernel::configure(), CLDirectConvolutionLayerKernel::configure(), CLPadLayer::configure(), CLHOGDescriptor::configure(), CLIntegralImageVertKernel::configure(), CLNormalizationLayer::configure(), CLFuseBatchNormalization::configure(), CLGEMMLowpOffsetContributionKernel::configure(), CLGEMMLowpOffsetContributionOutputStageKernel::configure(), CLReductionOperation::configure(), CLDepthwiseConvolutionLayer::configure(), CLScharr3x3Kernel::configure(), CLAccumulateWeighted::configure(), CLArgMinMaxLayer::configure(), CLL2NormalizeLayer::configure(), CLLaplacianPyramid::configure(), CLCol2ImKernel::configure(), CLGEMMReshapeRHSMatrixKernel::configure(), CLPixelWiseMultiplicationKernel::configure(), CLConvolutionLayerReshapeWeights::configure(), CLMinMaxLocation::configure(), CLConcatenateLayer::configure(), CLHOGDetector::configure(), CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(), CLAccumulateWeightedKernel::configure(), CLBatchNormalizationLayer::configure(), CLSobel7x7::configure(), CLCannyEdge::configure(), CLCropResize::configure(), CLSobel5x5::configure(), CLFastCorners::configure(), CLGaussian5x5::configure(), CLGEMMLowpMatrixMultiplyCore::configure(), CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(), CLWinogradConvolutionLayer::configure(), CLArithmeticAddition::configure(), CLWeightsReshapeKernel::configure(), CLSoftmaxLayerGeneric< IS_LOG >::configure(), CLLaplacianReconstruct::configure(), CLGEMMReshapeRHSMatrixKernelManaged::configure(), CLIm2ColKernel::configure(), CLLKTrackerFinalizeKernel::configure(), CLGEMMMatrixMultiplyReshapedKernel::configure(), CLHarrisCorners::configure(), CLFFTConvolutionLayer::configure(), CLComparisonStatic< COP >::configure(), CLHOGMultiDetection::configure(), CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(), CLGenerateProposalsLayer::configure(), CLLogicalOr::configure(), CLLogicalAnd::configure(), CLHistogramBorderKernel::configure(), CLGaussianPyramidVertKernel::configure(), CLLSTMLayerQuantized::configure(), CLGEMMLowpMatrixAReductionKernel::configure(), CLAccumulateSquared::configure(), CLOpticalFlow::configure(), CLPReluLayer::configure(), CLDirectDeconvolutionLayer::configure(), CLMinMaxLocationKernel::configure(), CLEdgeNonMaxSuppressionKernel::configure(), CLAccumulateSquaredKernel::configure(), CLGEMMDeconvolutionLayer::configure(), CLHOGBlockNormalizationKernel::configure(), CLSlice::configure(), CLConvolutionLayer::configure(), CLSeparableConvolutionHorKernel< matrix_size >::configure(), CLExpLayer::configure(), CLFullyConnectedLayerReshapeWeightsManaged::configure(), CLLSTMLayer::configure(), CLGaussianPyramidHalf::configure(), CLConvertFullyConnectedWeightsManaged::configure(), CLSobel5x5VertKernel::configure(), CLSobel7x7VertKernel::configure(), CLCopyToArrayKernel::configure(), CLQLSTMLayer::configure(), CLConvolutionLayerReshapeWeightsTransform::configure(), CLConvolutionSquare< matrix_size >::configure(), CLLKTrackerStage0Kernel::configure(), CLLogits1DNormKernel::configure(), CLGaussianPyramidOrb::configure(), CLSeparableConvolutionVertKernel< matrix_size >::configure(), CLGEMMLowpMatrixBReductionKernel::configure(), CLGEMM::configure(), CLEdgeTraceKernel::configure(), CLNegLayer::configure(), CLFullyConnectedLayer::configure(), CLComplexPixelWiseMultiplicationKernel::configure(), CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::configure(), CLConvolutionRectangle::configure(), CLLKTrackerStage1Kernel::configure(), CLArithmeticSubtraction::configure(), CLPixelWiseMultiplication::configure(), CLSinLayer::configure(), CLConvolutionRectangleKernel::configure(), CLGEMMConvolutionLayer::configure(), CLLogLayer::configure(), CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(), CLArithmeticDivision::configure(), CLComplexPixelWiseMultiplication::configure(), CLAbsLayer::configure(), CLGEMMLowpOutputStage::configure(), CLRoundLayer::configure(), CLElementwiseMax::configure(), CLElementwiseMin::configure(), CLElementwiseSquaredDiff::configure(), CLElementwisePower::configure(), CLScheduler::context(), arm_compute::create_kernel(), arm_compute::create_opencl_kernel(), arm_compute::test::validation::DATA_TEST_CASE(), CLScheduler::default_init(), CLScheduler::default_init_with_context(), arm_compute::error_on_unsupported_int64_base_atomics(), ICLKernel::get_max_workgroup_size(), ICLKernel::get_target(), CLRuntimeContext::kernel_library(), main(), arm_compute::restore_program_cache_from_file(), CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::run(), Framework::run(), arm_compute::utils::run_example(), arm_compute::save_program_cache_to_file(), CLScheduler::set_context(), arm_compute::test::validation::TEST_CASE(), OpenCLMemoryUsage::test_measurements(), CLTuner::tune_kernel_dynamic(), examples::gemm_tuner_helpers::update_padding_for_cl_image(), arm_compute::cl_gemm::update_padding_for_cl_image(), and arm_compute::cl_gemm::validate_image2d_support_on_rhs().
const std::map< std::string, cl::Program > & get_built_programs | ( | ) | const |
Access the cache of built OpenCL programs.
Definition at line 1189 of file CLKernelLibrary.cpp.
References CLCompileContext::get_built_programs().
Referenced by arm_compute::save_program_cache_to_file(), and OpenCLMemoryUsage::test_measurements().
CLCompileContext & get_compile_context | ( | ) |
Gets the compile context used.
Definition at line 1285 of file CLKernelLibrary.cpp.
const cl::Device & get_device | ( | ) |
Gets the CL device for which the programs are created.
Definition at line 1164 of file CLKernelLibrary.cpp.
References CLCompileContext::get_device().
Referenced by arm_compute::test::validation::DATA_TEST_CASE(), arm_compute::test::validation::TEST_CASE(), and arm_compute::cl_gemm::validate_image2d_support_on_rhs().
std::string get_device_version | ( | ) |
Return the device version.
Definition at line 1275 of file CLKernelLibrary.cpp.
References CLCompileContext::get_device_version().
std::string get_kernel_path | ( | ) |
Gets the path that the kernels reside in.
Definition at line 1179 of file CLKernelLibrary.cpp.
Referenced by arm_compute::create_kernel(), and arm_compute::test::validation::TEST_CASE().
cl_uint get_num_compute_units | ( | ) |
Return the maximum number of compute units in the device.
Definition at line 1280 of file CLKernelLibrary.cpp.
References CLCompileContext::get_num_compute_units().
Referenced by CLTuner::tune_kernel_dynamic().
std::pair< std::string, bool > get_program | ( | const std::string & | program_name | ) | const |
Gets the source of the selected program.
[in] | program_name | Program name. |
Definition at line 1214 of file CLKernelLibrary.cpp.
References ARM_COMPUTE_ERROR_ON_MSG, ARM_COMPUTE_ERROR_VAR, and arm_compute::read_file().
Referenced by CLKernelLibrary::create_kernel(), arm_compute::create_kernel(), and arm_compute::test::validation::TEST_CASE().
std::string get_program_name | ( | const std::string & | kernel_name | ) | const |
Returns the program name given a kernel name.
Definition at line 1133 of file CLKernelLibrary.cpp.
References ARM_COMPUTE_ERROR_VAR.
Referenced by CLKernelLibrary::create_kernel(), arm_compute::create_kernel(), and arm_compute::test::validation::TEST_CASE().
void init | ( | std::string | kernel_path, |
cl::Context | context, | ||
cl::Device | device | ||
) |
Initialises the kernel library.
[in] | kernel_path | Path of the directory from which kernel sources are loaded. |
[in] | context | CL context used to create programs. |
[in] | device | CL device for which the programs are created. |
Definition at line 1148 of file CLKernelLibrary.cpp.
Referenced by CLRuntimeContext::CLRuntimeContext(), CLScheduler::default_init(), and CLScheduler::default_init_with_context().
bool int64_base_atomics_supported | ( | ) | const |
Returns true if int64_base_atomics extension is supported by the CL device.
Definition at line 1204 of file CLKernelLibrary.cpp.
References CLCompileContext::int64_base_atomics_supported().
bool is_wbsm_supported | ( | ) |
Definition at line 1209 of file CLKernelLibrary.cpp.
References CLCompileContext::is_wbsm_supported().
size_t max_local_workgroup_size | ( | const cl::Kernel & | kernel | ) | const |
Find the maximum number of local work items in a workgroup can be supported for the kernel.
Definition at line 1265 of file CLKernelLibrary.cpp.
References CLCompileContext::max_local_workgroup_size().
Referenced by ICLKernel::get_max_workgroup_size().
void set_context | ( | cl::Context | context | ) |
Sets the CL context used to create programs.
[in] | context | A CL context. |
Definition at line 1174 of file CLKernelLibrary.cpp.
References CLCompileContext::set_context().
Referenced by CLScheduler::set_context().
void set_device | ( | cl::Device | device | ) |
Sets the CL device for which the programs are created.
[in] | device | A CL device. |
Definition at line 1169 of file CLKernelLibrary.cpp.
References CLCompileContext::set_device().
void set_kernel_path | ( | const std::string & | kernel_path | ) |
Sets the path that the kernels reside in.
[in] | kernel_path | Path of the kernel. |
Definition at line 1154 of file CLKernelLibrary.cpp.