23.08
|
CLKernelLibrary class. More...
#include <CLKernelLibrary.h>
Public Member Functions | |
void | init (std::string kernel_path, cl::Context context, cl::Device device) |
Initialises the kernel library. More... | |
void | set_kernel_path (const std::string &kernel_path) |
Sets the path that the kernels reside in. More... | |
std::string | get_kernel_path () |
Gets the path that the kernels reside in. More... | |
std::pair< std::string, bool > | get_program (const std::string &program_name) const |
Gets the source of the selected program. More... | |
cl::Context & | context () |
Accessor for the associated CL context. More... | |
const cl::Device & | get_device () |
Gets the CL device for which the programs are created. More... | |
void | set_device (cl::Device device) |
Sets the CL device for which the programs are created. More... | |
std::string | get_device_version () |
Return the device version. More... | |
cl_uint | get_num_compute_units () |
Return the maximum number of compute units in the device. More... | |
Kernel | create_kernel (const std::string &kernel_name, const std::set< std::string > &build_options_set={}) const |
Creates a kernel from the kernel library. More... | |
size_t | max_local_workgroup_size (const cl::Kernel &kernel) const |
Find the maximum number of local work items in a workgroup can be supported for the kernel. More... | |
cl::NDRange | default_ndrange () const |
Return the default NDRange for the device. More... | |
void | clear_programs_cache () |
Clear the library's cache of binary programs. More... | |
const std::map< std::string, cl::Program > & | get_built_programs () const |
Access the cache of built OpenCL programs. More... | |
void | add_built_program (const std::string &built_program_name, const cl::Program &program) |
Add a new built program to the cache. More... | |
bool | fp16_supported () const |
Returns true if FP16 is supported by the CL device. More... | |
bool | int64_base_atomics_supported () const |
Returns true if int64_base_atomics extension is supported by the CL device. More... | |
std::string | get_program_name (const std::string &kernel_name) const |
Returns the program name given a kernel name. More... | |
bool | is_wbsm_supported () |
void | set_context (cl::Context context) |
Sets the CL context used to create programs. More... | |
CLCompileContext & | get_compile_context () |
Gets the compile context used. More... | |
Static Public Member Functions | |
static CLKernelLibrary & | get () |
Access the KernelLibrary singleton. More... | |
CLKernelLibrary class.
Definition at line 38 of file CLKernelLibrary.h.
void add_built_program | ( | const std::string & | built_program_name, |
const cl::Program & | program | ||
) |
Add a new built program to the cache.
[in] | built_program_name | Name of the program |
[in] | program | Built program to add to the cache |
Definition at line 93 of file CLKernelLibrary.cpp.
Referenced by arm_compute::restore_program_cache_from_file().
void clear_programs_cache | ( | ) |
Clear the library's cache of binary programs.
Definition at line 85 of file CLKernelLibrary.cpp.
Referenced by Framework::run().
cl::Context & context | ( | ) |
Accessor for the associated CL context.
Definition at line 65 of file CLKernelLibrary.cpp.
Referenced by ClContext::ClContext(), CLScheduler::context(), and arm_compute::create_image2d_from_tensor().
Kernel create_kernel | ( | const std::string & | kernel_name, |
const std::set< std::string > & | build_options_set = {} |
||
) | const |
Creates a kernel from the kernel library.
Definition at line 44 of file CLKernelLibrary.cpp.
References kernel_name, ClKernelLibrary::program(), and ClKernelLibrary::program_name().
cl::NDRange default_ndrange | ( | ) | const |
Return the default NDRange for the device.
Definition at line 118 of file CLKernelLibrary.cpp.
Referenced by arm_compute::get_default_lws_for_type().
bool fp16_supported | ( | ) | const |
Returns true if FP16 is supported by the CL device.
Definition at line 97 of file CLKernelLibrary.cpp.
|
static |
Access the KernelLibrary singleton.
This method has been deprecated and will be removed in future releases
Definition at line 39 of file CLKernelLibrary.cpp.
Referenced by ClContext::ClContext(), CLRuntimeContext::CLRuntimeContext(), CLNormalizePlanarYUVLayer::configure(), CLReverse::configure(), CLBitwiseNot::configure(), CLTile::configure(), CLSynthetizeOperator< ClGemmMatrixMultiplyReshapedOnlyRhsKernel >::configure(), CLReverseKernel::configure(), CLChannelShuffleLayerKernel::configure(), CLDepthToSpaceLayer::configure(), CLDepthToSpaceLayerKernel::configure(), CLSelect::configure(), CLSpaceToDepthLayerKernel::configure(), CLBitwiseAnd::configure(), CLComputeAllAnchorsKernel::configure(), CLBitwiseXor::configure(), CLBitwiseOr::configure(), CLNormalizationLayerKernel::configure(), CLGather::configure(), CLReorgLayerKernel::configure(), CLQLSTMLayerNormalizationKernel::configure(), CLSpaceToBatchLayerKernel::configure(), CLTileKernel::configure(), CLDeconvolutionLayerUpsampleKernel::configure(), CLGatherKernel::configure(), CLMeanStdDevNormalizationLayer::configure(), CLReorgLayer::configure(), CLFFTScaleKernel::configure(), ClGemmLowpMatrixMultiplyNativeKernel::configure(), CLComparison::configure(), CLBatchToSpaceLayerKernel::configure(), CLMeanStdDevNormalizationKernel::configure(), CLComparisonKernel::configure(), CLFFTDigitReverseKernel::configure(), CLNormalizePlanarYUVLayerKernel::configure(), CLPadLayerKernel::configure(), CLReductionOperationKernel::configure(), CLPriorBoxLayerKernel::configure(), CLPriorBoxLayer::configure(), CLChannelShuffleLayer::configure(), CLRangeKernel::configure(), CLBoundingBoxTransformKernel::configure(), ClGemmMatrixMultiplyNativeKernel::configure(), CLL2NormalizeLayerKernel::configure(), CLROIPoolingLayerKernel::configure(), CLFFTRadixStageKernel::configure(), CLStackLayerKernel::configure(), CLROIAlignLayerKernel::configure(), CLArgMinMaxLayerKernel::configure(), CLUnstack::configure(), CLFuseBatchNormalizationKernel::configure(), CLReduceMean::configure(), CLReshapeLayer::configure(), CLRange::configure(), CLTranspose::configure(), CLBatchNormalizationLayerKernel::configure(), ClGemmLowpMatrixMultiplyReshapedKernel::configure(), CLBoundingBoxTransform::configure(), CLFlattenLayer::configure(), CLCopy::configure(), CLFill::configure(), CLFillBorderKernel::configure(), CLFloor::configure(), CLROIPoolingLayer::configure(), CLRsqrtLayer::configure(), CLDeconvolutionReshapeOutputKernel::configure(), CLSpaceToDepthLayer::configure(), CLLogicalAnd::configure(), CLPermute::configure(), CLLogicalOr::configure(), CLROIAlignLayer::configure(), CLConvertFullyConnectedWeights::configure(), CLLogicalNot::configure(), CLRNNLayer::configure(), CLPReluLayer::configure(), CLSlice::configure(), ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel::configure(), CLDeconvolutionLayer::configure(), CLDequantizationLayer::configure(), CLCrop::configure(), CLBatchToSpaceLayer::configure(), CLFFT2D::configure(), CLPooling3dLayer::configure(), CLScale::configure(), CLSpaceToBatchLayer::configure(), CLStackLayer::configure(), CLStridedSlice::configure(), ClGemmLowpMatrixAReductionKernel::configure(), CLDeconvolutionLayerUpsample::configure(), ClGemmMatrixMultiplyReshapedOnlyRhsKernel::configure(), CLFFT1D::configure(), CLPoolingLayer::configure(), CLIndirectConvolutionLayer::configure(), CLQuantizationLayer::configure(), CLDepthConvertLayer::configure(), CLPadLayer::configure(), CLSoftmaxLayerGeneric< IS_LOG >::configure(), CLCast::configure(), CLMaxUnpoolingLayer::configure(), CLNormalizationLayer::configure(), CLFuseBatchNormalization::configure(), CLDirectConvolutionLayer::configure(), CLReductionOperation::configure(), CLL2NormalizeLayer::configure(), CLActivationLayer::configure(), CLDepthwiseConvolutionLayerNativeKernel::configure(), CLArgMinMaxLayer::configure(), CLArithmeticAddition::configure(), CLCropResize::configure(), CLInstanceNormalizationLayer::configure(), CLConv3D::configure(), CLConcatenateLayer::configure(), ClGemmMatrixMultiplyReshapedKernel::configure(), CLPixelWiseMultiplication::configure(), CLWinogradConvolutionLayer::configure(), CLBatchNormalizationLayer::configure(), CLGEMMLowpOutputStage::configure(), CLFullyConnectedLayer::configure(), CLGEMMLowpMatrixMultiplyCore::configure(), CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder< K, bordersize >::configure(), CLFFTConvolutionLayer::configure(), CLMatMul::configure(), CLComparisonStatic< COP >::configure(), CLDepthwiseConvolutionLayer::configure(), CLGEMMConvolutionLayer::configure(), CLGEMM::configure(), CLLSTMLayerQuantized::configure(), CLGenerateProposalsLayer::configure(), CLDirectDeconvolutionLayer::configure(), CLGEMMDeconvolutionLayer::configure(), CLLSTMLayer::configure(), CLExpLayer::configure(), CLConvolutionLayer::configure(), CLQLSTMLayer::configure(), CLConvertFullyConnectedWeightsManaged::configure(), CLComplexPixelWiseMultiplication::configure(), CLNegLayer::configure(), CLArithmeticSubtraction::configure(), ClSynthetizeOperatorWithBorder< K >::configure(), CLSinLayer::configure(), CLLogLayer::configure(), CLArithmeticDivision::configure(), CLAbsLayer::configure(), CLElementwiseMax::configure(), CLRoundLayer::configure(), CLElementwiseMin::configure(), CLElementwiseSquaredDiff::configure(), CLElementwisePower::configure(), CLScheduler::context(), ClContext::create_activation(), arm_compute::create_image2d_from_buffer(), arm_compute::create_image2d_from_tensor(), CLScheduler::default_init(), CLScheduler::default_init_with_context(), arm_compute::error_on_unsupported_int64_base_atomics(), arm_compute::export_to_cl_image(), arm_compute::get_default_lws_for_type(), ICLKernel::get_max_workgroup_size(), CLRuntimeContext::kernel_library(), main(), arm_compute::restore_program_cache_from_file(), CLDepthwiseConvolutionLayerNativeKernel::run(), Framework::run(), ClMatMulNativeKernel::run_op(), ClIndirectConv2dKernel::run_op(), ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel::run_op(), ClGemmMatrixMultiplyReshapedOnlyRhsKernel::run_op(), ClGemmMatrixMultiplyReshapedKernel::run_op(), arm_compute::save_program_cache_to_file(), CLScheduler::set_context(), arm_compute::test::validation::TEST_CASE(), OpenCLMemoryUsage::test_measurements(), examples::gemm_tuner_helpers::update_padding_for_cl_image(), arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(), ClMatMulNativeMMULKernel::validate(), and arm_compute::opencl::kernels::gemm::validate_image2d_support_on_rhs().
const std::map< std::string, cl::Program > & get_built_programs | ( | ) | const |
Access the cache of built OpenCL programs.
Definition at line 89 of file CLKernelLibrary.cpp.
Referenced by OpenCLMemoryUsage::test_measurements().
CLCompileContext & get_compile_context | ( | ) |
Gets the compile context used.
Definition at line 130 of file CLKernelLibrary.cpp.
Referenced by CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder< K, bordersize >::configure(), and arm_compute::test::validation::TEST_CASE().
const cl::Device & get_device | ( | ) |
Gets the CL device for which the programs are created.
Definition at line 69 of file CLKernelLibrary.cpp.
Referenced by ClContext::ClContext(), arm_compute::create_image2d_from_tensor(), arm_compute::export_to_cl_image(), arm_compute::test::validation::TEST_CASE(), and arm_compute::opencl::kernels::gemm::validate_image2d_support_on_rhs().
std::string get_device_version | ( | ) |
Return the device version.
Definition at line 122 of file CLKernelLibrary.cpp.
std::string get_kernel_path | ( | ) |
Gets the path that the kernels reside in.
Definition at line 81 of file CLKernelLibrary.cpp.
Referenced by arm_compute::test::validation::TEST_CASE().
cl_uint get_num_compute_units | ( | ) |
Return the maximum number of compute units in the device.
Definition at line 126 of file CLKernelLibrary.cpp.
std::pair< std::string, bool > get_program | ( | const std::string & | program_name | ) | const |
Gets the source of the selected program.
[in] | program_name | Program name. |
Definition at line 109 of file CLKernelLibrary.cpp.
Referenced by arm_compute::test::validation::TEST_CASE().
std::string get_program_name | ( | const std::string & | kernel_name | ) | const |
Returns the program name given a kernel name.
Definition at line 52 of file CLKernelLibrary.cpp.
References kernel_name.
Referenced by arm_compute::test::validation::TEST_CASE().
void init | ( | std::string | kernel_path, |
cl::Context | context, | ||
cl::Device | device | ||
) |
Initialises the kernel library.
[in] | kernel_path | Path of the directory from which kernel sources are loaded. |
[in] | context | CL context used to create programs. |
[in] | device | CL device for which the programs are created. |
Definition at line 56 of file CLKernelLibrary.cpp.
References arm_compute::test::validation::context.
Referenced by CLRuntimeContext::CLRuntimeContext(), CLScheduler::default_init(), and CLScheduler::default_init_with_context().
bool int64_base_atomics_supported | ( | ) | const |
Returns true if int64_base_atomics extension is supported by the CL device.
Definition at line 101 of file CLKernelLibrary.cpp.
bool is_wbsm_supported | ( | ) |
Definition at line 105 of file CLKernelLibrary.cpp.
size_t max_local_workgroup_size | ( | const cl::Kernel & | kernel | ) | const |
Find the maximum number of local work items in a workgroup can be supported for the kernel.
Definition at line 114 of file CLKernelLibrary.cpp.
Referenced by ICLKernel::get_max_workgroup_size().
void set_context | ( | cl::Context | context | ) |
Sets the CL context used to create programs.
[in] | context | A CL context. |
Definition at line 77 of file CLKernelLibrary.cpp.
References arm_compute::test::validation::context.
Referenced by CLScheduler::set_context().
void set_device | ( | cl::Device | device | ) |
Sets the CL device for which the programs are created.
[in] | device | A CL device. |
Definition at line 73 of file CLKernelLibrary.cpp.
void set_kernel_path | ( | const std::string & | kernel_path | ) |
Sets the path that the kernels reside in.
[in] | kernel_path | Path of the kernel. |
Definition at line 61 of file CLKernelLibrary.cpp.