Compute Library
 21.05
CLKernelLibrary Class Referencefinal

CLKernelLibrary class. More...

#include <CLKernelLibrary.h>

Public Member Functions

void init (std::string kernel_path, cl::Context context, cl::Device device)
 Initialises the kernel library. More...
 
void set_kernel_path (const std::string &kernel_path)
 Sets the path that the kernels reside in. More...
 
std::string get_kernel_path ()
 Gets the path that the kernels reside in. More...
 
std::pair< std::string, bool > get_program (const std::string &program_name) const
 Gets the source of the selected program. More...
 
cl::Context & context ()
 Accessor for the associated CL context. More...
 
const cl::Device & get_device ()
 Gets the CL device for which the programs are created. More...
 
void set_device (cl::Device device)
 Sets the CL device for which the programs are created. More...
 
std::string get_device_version ()
 Return the device version. More...
 
cl_uint get_num_compute_units ()
 Return the maximum number of compute units in the device. More...
 
Kernel create_kernel (const std::string &kernel_name, const std::set< std::string > &build_options_set={}) const
 Creates a kernel from the kernel library. More...
 
size_t max_local_workgroup_size (const cl::Kernel &kernel) const
 Find the maximum number of local work items in a workgroup can be supported for the kernel. More...
 
cl::NDRange default_ndrange () const
 Return the default NDRange for the device. More...
 
void clear_programs_cache ()
 Clear the library's cache of binary programs. More...
 
const std::map< std::string, cl::Program > & get_built_programs () const
 Access the cache of built OpenCL programs. More...
 
void add_built_program (const std::string &built_program_name, const cl::Program &program)
 Add a new built program to the cache. More...
 
bool fp16_supported () const
 Returns true if FP16 is supported by the CL device. More...
 
bool int64_base_atomics_supported () const
 Returns true if int64_base_atomics extension is supported by the CL device. More...
 
std::string get_program_name (const std::string &kernel_name) const
 Returns the program name given a kernel name. More...
 
bool is_wbsm_supported ()
 
void set_context (cl::Context context)
 Sets the CL context used to create programs. More...
 
CLCompileContextget_compile_context ()
 Gets the compile context used. More...
 

Static Public Member Functions

static CLKernelLibraryget ()
 Access the KernelLibrary singleton. More...
 

Detailed Description

CLKernelLibrary class.

Definition at line 38 of file CLKernelLibrary.h.

Member Function Documentation

◆ add_built_program()

void add_built_program ( const std::string &  built_program_name,
const cl::Program &  program 
)

Add a new built program to the cache.

Parameters
[in]built_program_nameName of the program
[in]programBuilt program to add to the cache

Definition at line 940 of file CLKernelLibrary.cpp.

941 {
942  _compile_context.add_built_program(built_program_name, program);
943 }
void add_built_program(const std::string &built_program_name, const cl::Program &program) const
Add a new built program to the cache.

References CLCompileContext::add_built_program().

Referenced by arm_compute::restore_program_cache_from_file().

◆ clear_programs_cache()

void clear_programs_cache ( )

Clear the library's cache of binary programs.

Definition at line 930 of file CLKernelLibrary.cpp.

931 {
932  _compile_context.clear_programs_cache();
933 }
void clear_programs_cache()
Clear the library's cache of binary programs.

References CLCompileContext::clear_programs_cache().

Referenced by Framework::run().

◆ context()

cl::Context & context ( )

Accessor for the associated CL context.

Returns
A CL context.

Definition at line 905 of file CLKernelLibrary.cpp.

906 {
907  return _compile_context.context();
908 }
cl::Context & context()
Accessor for the associated CL context.

References CLCompileContext::context().

Referenced by ClContext::ClContext(), CLScheduler::context(), CLKernelLibrary::init(), and CLKernelLibrary::set_context().

◆ create_kernel()

Kernel create_kernel ( const std::string &  kernel_name,
const std::set< std::string > &  build_options_set = {} 
) const

Creates a kernel from the kernel library.

Parameters
[in]kernel_nameKernel name.
[in]build_options_setKernel build options as a set.
Returns
The created kernel.

Definition at line 870 of file CLKernelLibrary.cpp.

871 {
872  const std::string program_name = get_program_name(kernel_name);
873  auto program = get_program(program_name);
874 
875  return _compile_context.create_kernel(kernel_name, program_name, program.first, _kernel_path, build_options_set, program.second);
876 }
std::pair< std::string, bool > get_program(const std::string &program_name) const
Gets the source of the selected program.
std::string kernel_name
Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
Creates an OpenCL kernel.
std::string get_program_name(const std::string &kernel_name) const
Returns the program name given a kernel name.

References CLCompileContext::create_kernel(), CLKernelLibrary::get_program(), CLKernelLibrary::get_program_name(), and kernel_name.

Referenced by arm_compute::create_opencl_kernel().

◆ default_ndrange()

cl::NDRange default_ndrange ( ) const

Return the default NDRange for the device.

Definition at line 1016 of file CLKernelLibrary.cpp.

1017 {
1018  return _compile_context.default_ndrange();
1019 }
cl::NDRange default_ndrange() const
Return the default NDRange for the device.

References CLCompileContext::default_ndrange().

Referenced by CLArgMinMaxLayerKernel::configure().

◆ fp16_supported()

bool fp16_supported ( ) const

Returns true if FP16 is supported by the CL device.

Returns
true if the CL device supports FP16

Definition at line 945 of file CLKernelLibrary.cpp.

946 {
947  return _compile_context.fp16_supported();
948 }
bool fp16_supported() const
Returns true if FP16 is supported by the CL device.

References CLCompileContext::fp16_supported().

◆ get()

CLKernelLibrary & get ( )
static

Access the KernelLibrary singleton.

This method has been deprecated and will be removed in future releases

Returns
The KernelLibrary instance.

Definition at line 864 of file CLKernelLibrary.cpp.

865 {
866  static CLKernelLibrary _kernel_library;
867  return _kernel_library;
868 }
CLKernelLibrary class.

Referenced by ClContext::ClContext(), CLRuntimeContext::CLRuntimeContext(), ClFillKernel::configure(), CLMinMaxLayerKernel::configure(), CLNormalizePlanarYUVLayer::configure(), CLBitwiseNot::configure(), CLReverse::configure(), CLTile::configure(), CLFillBorder::configure(), ClCropKernel::configure(), CLChannelShuffleLayerKernel::configure(), CLDepthToSpaceLayer::configure(), CLReverseKernel::configure(), CLGather::configure(), CLSpaceToDepthLayerKernel::configure(), CLDepthToSpaceLayerKernel::configure(), CLBitwiseAnd::configure(), CLSelect::configure(), CLBitwiseOr::configure(), CLComputeAllAnchorsKernel::configure(), CLBitwiseXor::configure(), CLBatchToSpaceLayerKernel::configure(), CLNormalizationLayerKernel::configure(), CLTileKernel::configure(), CLSpaceToBatchLayerKernel::configure(), CLDeconvolutionLayerUpsampleKernel::configure(), CLQLSTMLayerNormalizationKernel::configure(), CLRemapKernel::configure(), CLGatherKernel::configure(), CLReorgLayerKernel::configure(), CLComparisonKernel::configure(), CLMeanStdDevNormalizationLayer::configure(), CLDepthConvertLayerKernel::configure(), CLReorgLayer::configure(), CLFFTScaleKernel::configure(), CLComparison::configure(), CLMeanStdDevNormalizationKernel::configure(), CLChannelShuffleLayer::configure(), CLFFTDigitReverseKernel::configure(), CLNormalizePlanarYUVLayerKernel::configure(), CLPadLayerKernel::configure(), CLFlattenLayer::configure(), CLPriorBoxLayer::configure(), CLPriorBoxLayerKernel::configure(), CLReductionOperationKernel::configure(), CLGEMMLowpMatrixMultiplyNativeKernel::configure(), CLL2NormalizeLayerKernel::configure(), CLRangeKernel::configure(), CLBoundingBoxTransformKernel::configure(), CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(), CLFFTRadixStageKernel::configure(), CLROIPoolingLayerKernel::configure(), CLStackLayerKernel::configure(), CLGEMMReshapeLHSMatrixKernel::configure(), CLWinogradInputTransformKernel::configure(), CLCast::configure(), CLROIAlignLayerKernel::configure(), CLGEMMMatrixMultiplyNativeKernel::configure(), CLRemap::configure(), CLUnstack::configure(), CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(), CLDepthConvertLayer::configure(), CLFuseBatchNormalizationKernel::configure(), CLWinogradFilterTransformKernel::configure(), CLReduceMean::configure(), CLReshapeLayer::configure(), CLBatchNormalizationLayerKernel::configure(), CLFill::configure(), CLGEMMLowpMatrixMultiplyReshapedKernel::configure(), CLTranspose::configure(), CLBoundingBoxTransform::configure(), CLRange::configure(), CLArgMinMaxLayerKernel::configure(), CLWinogradOutputTransformKernel::configure(), CLDepthwiseConvolutionLayerNativeKernel::configure(), CLCopy::configure(), CLFillBorderKernel::configure(), CLFloor::configure(), CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(), CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::configure(), CLGEMMMatrixMultiplyKernel::configure(), CLROIPoolingLayer::configure(), CLWinogradInputTransform::configure(), CLRsqrtLayer::configure(), CLDeconvolutionReshapeOutputKernel::configure(), CLSpaceToDepthLayer::configure(), CLPermute::configure(), CLROIAlignLayer::configure(), CLBatchToSpaceLayer::configure(), CLDeconvolutionLayer::configure(), CLGEMMLowpOffsetContributionKernel::configure(), CLGEMMLowpOffsetContributionOutputStageKernel::configure(), CLLogicalNot::configure(), CLRNNLayer::configure(), CLPReluLayer::configure(), CLDequantizationLayer::configure(), CLGEMMReshapeRHSMatrixKernel::configure(), CLCol2ImKernel::configure(), CLPoolingLayer::configure(), CLConvertFullyConnectedWeights::configure(), CLCrop::configure(), CLFFT2D::configure(), CLConvolutionLayerReshapeWeights::configure(), CLFullyConnectedLayerReshapeWeightsManaged::configure(), CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(), CLSpaceToBatchLayer::configure(), CLScale::configure(), CLDeconvolutionLayerUpsample::configure(), CLStridedSlice::configure(), CLStackLayer::configure(), CLFFT1D::configure(), CLQuantizationLayer::configure(), CLPadLayer::configure(), CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(), CLSoftmaxLayerGeneric< IS_LOG >::configure(), CLNormalizationLayer::configure(), CLWeightsReshapeKernel::configure(), CLMaxUnpoolingLayer::configure(), CLDirectConvolutionLayer::configure(), CLFuseBatchNormalization::configure(), CLGEMMReshapeRHSMatrixKernelManaged::configure(), CLActivationLayer::configure(), CLReductionOperation::configure(), CLL2NormalizeLayer::configure(), CLPixelWiseMultiplication::configure(), CLIm2ColKernel::configure(), CLArithmeticAddition::configure(), CLArgMinMaxLayer::configure(), CLCropResize::configure(), CLBatchNormalizationLayer::configure(), CLInstanceNormalizationLayer::configure(), CLConcatenateLayer::configure(), CLDepthwiseConvolutionLayer::configure(), CLGEMMMatrixMultiplyReshapedKernel::configure(), CLWinogradConvolutionLayer::configure(), CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(), CLGEMMLowpMatrixAReductionKernel::configure(), CLFFTConvolutionLayer::configure(), CLComparisonStatic< COP >::configure(), CLGEMMLowpMatrixMultiplyCore::configure(), CLLSTMLayerQuantized::configure(), CLLogicalAnd::configure(), CLLogicalOr::configure(), CLGenerateProposalsLayer::configure(), CLSlice::configure(), CLDirectDeconvolutionLayer::configure(), CLGEMMDeconvolutionLayer::configure(), CLConvolutionLayerReshapeWeightsTransform::configure(), CLConvolutionLayer::configure(), CLLSTMLayer::configure(), CLQLSTMLayer::configure(), CLExpLayer::configure(), CLConvertFullyConnectedWeightsManaged::configure(), CLFullyConnectedLayer::configure(), CLGEMMLowpMatrixBReductionKernel::configure(), CLComplexPixelWiseMultiplication::configure(), CLGEMM::configure(), CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::configure(), CLNegLayer::configure(), CLArithmeticSubtraction::configure(), CLGEMMConvolutionLayer::configure(), CLSinLayer::configure(), CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::configure(), CLLogLayer::configure(), CLArithmeticDivision::configure(), CLGEMMLowpOutputStage::configure(), CLAbsLayer::configure(), CLElementwiseMax::configure(), CLRoundLayer::configure(), CLElementwiseMin::configure(), CLElementwiseSquaredDiff::configure(), CLElementwisePower::configure(), CLScheduler::context(), arm_compute::create_kernel(), arm_compute::create_opencl_kernel(), CLScheduler::default_init(), CLScheduler::default_init_with_context(), arm_compute::error_on_unsupported_int64_base_atomics(), ICLKernel::get_max_workgroup_size(), CLRuntimeContext::kernel_library(), main(), arm_compute::restore_program_cache_from_file(), CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::run(), Framework::run(), ClDirectConvolutionKernel::run_op(), arm_compute::save_program_cache_to_file(), CLScheduler::set_context(), arm_compute::test::validation::TEST_CASE(), OpenCLMemoryUsage::test_measurements(), CLTuner::tune_kernel_dynamic(), examples::gemm_tuner_helpers::update_padding_for_cl_image(), arm_compute::cl_gemm::update_padding_for_cl_image(), and arm_compute::cl_gemm::validate_image2d_support_on_rhs().

◆ get_built_programs()

const std::map< std::string, cl::Program > & get_built_programs ( ) const

Access the cache of built OpenCL programs.

Definition at line 935 of file CLKernelLibrary.cpp.

936 {
937  return _compile_context.get_built_programs();
938 }
const std::map< std::string, cl::Program > & get_built_programs() const
Access the cache of built OpenCL programs.

References CLCompileContext::get_built_programs().

Referenced by arm_compute::save_program_cache_to_file(), and OpenCLMemoryUsage::test_measurements().

◆ get_compile_context()

CLCompileContext & get_compile_context ( )

Gets the compile context used.

Returns
The used compile context

Definition at line 1031 of file CLKernelLibrary.cpp.

1032 {
1033  return _compile_context;
1034 }

◆ get_device()

const cl::Device & get_device ( )

Gets the CL device for which the programs are created.

Definition at line 910 of file CLKernelLibrary.cpp.

911 {
912  return _compile_context.get_device();
913 }
const cl::Device & get_device() const
Gets the CL device for which the programs are created.

References CLCompileContext::get_device().

Referenced by ClContext::ClContext(), arm_compute::test::validation::TEST_CASE(), and arm_compute::cl_gemm::validate_image2d_support_on_rhs().

◆ get_device_version()

std::string get_device_version ( )

Return the device version.

Returns
The content of CL_DEVICE_VERSION

Definition at line 1021 of file CLKernelLibrary.cpp.

1022 {
1023  return _compile_context.get_device_version();
1024 }
std::string get_device_version() const
Return the device version.

References CLCompileContext::get_device_version().

◆ get_kernel_path()

std::string get_kernel_path ( )

Gets the path that the kernels reside in.

Definition at line 925 of file CLKernelLibrary.cpp.

926 {
927  return _kernel_path;
928 }

Referenced by arm_compute::create_kernel(), and arm_compute::test::validation::TEST_CASE().

◆ get_num_compute_units()

cl_uint get_num_compute_units ( )

Return the maximum number of compute units in the device.

Returns
The content of CL_DEVICE_MAX_COMPUTE_UNITS

Definition at line 1026 of file CLKernelLibrary.cpp.

1027 {
1028  return _compile_context.get_num_compute_units();
1029 }
cl_uint get_num_compute_units() const
Return the maximum number of compute units in the device.

References CLCompileContext::get_num_compute_units().

Referenced by CLTuner::tune_kernel_dynamic().

◆ get_program()

std::pair< std::string, bool > get_program ( const std::string &  program_name) const

Gets the source of the selected program.

Parameters
[in]program_nameProgram name.
Returns
A pair with the source (false) or the binary (true), of the selected program.

Definition at line 960 of file CLKernelLibrary.cpp.

961 {
962 #ifdef EMBEDDED_KERNELS
963 #ifdef ARM_COMPUTE_COMPRESSED_KERNELS
964  const auto inflatted_program_source_it = _decompressed_source_map.find(program_name);
965  if(inflatted_program_source_it != _decompressed_source_map.end())
966  {
967  return std::make_pair(inflatted_program_source_it->second, false);
968  }
969 #endif /* ARM_COMPUTE_COMPRESSED_KERNELS */
970 
971  const auto program_source_it = _program_source_map.find(program_name);
972  if(program_source_it == _program_source_map.end())
973  {
974  ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
975  }
976  std::string program_source = program_source_it->second;
977 
978 #ifdef ARM_COMPUTE_COMPRESSED_KERNELS
979  std::string decompressed_program_source = decompress_zlib(decode_base64(program_source_it->second));
980  ARM_COMPUTE_ERROR_ON_MSG(decompressed_program_source.empty(), "Cannot de-compress requested program");
981  _decompressed_source_map.insert(std::make_pair(program_name, decompressed_program_source));
982  program_source = std::move(decompressed_program_source);
983 #endif /* ARM_COMPUTE_COMPRESSED_KERNELS */
984 
985  return std::make_pair(program_source, false);
986 #else /* EMBEDDED_KERNELS */
987  // Check for binary
988  std::string source_name = _kernel_path + program_name;
989  std::string binary_name = source_name + "bin";
990  std::string program_source{};
991  bool is_binary = false;
992 
993  if(std::ifstream(binary_name).is_open())
994  {
995  program_source = read_file(binary_name, true);
996  is_binary = true;
997  }
998  else if(std::ifstream(source_name).is_open())
999  {
1000  program_source = read_file(source_name, false);
1001  }
1002  else
1003  {
1004  ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str());
1005  }
1006 
1007  return std::make_pair(program_source, is_binary);
1008 #endif /* EMBEDDED_KERNELS */
1009 }
#define ARM_COMPUTE_ERROR_VAR(msg,...)
Print the given message then throw an std::runtime_error.
Definition: Error.h:346
std::string read_file(const std::string &filename, bool binary)
Load an entire file in memory.
Definition: Utils.cpp:38
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
Definition: Error.h:456

References ARM_COMPUTE_ERROR_ON_MSG, ARM_COMPUTE_ERROR_VAR, and arm_compute::read_file().

Referenced by CLKernelLibrary::create_kernel(), arm_compute::create_kernel(), and arm_compute::test::validation::TEST_CASE().

◆ get_program_name()

std::string get_program_name ( const std::string &  kernel_name) const

Returns the program name given a kernel name.

Returns
Program name

Definition at line 878 of file CLKernelLibrary.cpp.

879 {
880  // Find which program contains the kernel
881  auto kernel_program_it = _kernel_program_map.find(kernel_name);
882 
883  if(_kernel_program_map.end() == kernel_program_it)
884  {
885  ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
886  }
887 
888  const std::string program_name = kernel_program_it->second;
889 
890  return program_name;
891 }
#define ARM_COMPUTE_ERROR_VAR(msg,...)
Print the given message then throw an std::runtime_error.
Definition: Error.h:346
std::string kernel_name

References ARM_COMPUTE_ERROR_VAR, and kernel_name.

Referenced by CLKernelLibrary::create_kernel(), arm_compute::create_kernel(), and arm_compute::test::validation::TEST_CASE().

◆ init()

void init ( std::string  kernel_path,
cl::Context  context,
cl::Device  device 
)

Initialises the kernel library.

Parameters
[in]kernel_pathPath of the directory from which kernel sources are loaded.
[in]contextCL context used to create programs.
[in]deviceCL device for which the programs are created.

Definition at line 893 of file CLKernelLibrary.cpp.

894 {
895  _compile_context = CLCompileContext(context, device);
896  _kernel_path = kernel_path + "/";
897 }
CLCompileContext class.
cl::Context & context()
Accessor for the associated CL context.

References CLKernelLibrary::context().

Referenced by CLRuntimeContext::CLRuntimeContext(), CLScheduler::default_init(), and CLScheduler::default_init_with_context().

◆ int64_base_atomics_supported()

bool int64_base_atomics_supported ( ) const

Returns true if int64_base_atomics extension is supported by the CL device.

Returns
true if the CL device supports int64_base_atomics extension

Definition at line 950 of file CLKernelLibrary.cpp.

951 {
952  return _compile_context.int64_base_atomics_supported();
953 }
bool int64_base_atomics_supported() const
Returns true if int64_base_atomics extension is supported by the CL device.

References CLCompileContext::int64_base_atomics_supported().

◆ is_wbsm_supported()

bool is_wbsm_supported ( )

Definition at line 955 of file CLKernelLibrary.cpp.

956 {
957  return _compile_context.is_wbsm_supported();
958 }

References CLCompileContext::is_wbsm_supported().

◆ max_local_workgroup_size()

size_t max_local_workgroup_size ( const cl::Kernel &  kernel) const

Find the maximum number of local work items in a workgroup can be supported for the kernel.

Definition at line 1011 of file CLKernelLibrary.cpp.

1012 {
1013  return _compile_context.max_local_workgroup_size(kernel);
1014 }
size_t max_local_workgroup_size(const cl::Kernel &kernel) const
Find the maximum number of local work items in a workgroup can be supported for the kernel.

References CLCompileContext::max_local_workgroup_size().

Referenced by ICLKernel::get_max_workgroup_size().

◆ set_context()

void set_context ( cl::Context  context)

Sets the CL context used to create programs.

Note
Setting the context also resets the device to the first one available in the new context.
Parameters
[in]contextA CL context.

Definition at line 920 of file CLKernelLibrary.cpp.

921 {
922  _compile_context.set_context(context);
923 }
void set_context(cl::Context context)
Sets the CL context used to create programs.
cl::Context & context()
Accessor for the associated CL context.

References CLKernelLibrary::context(), and CLCompileContext::set_context().

Referenced by CLScheduler::set_context().

◆ set_device()

void set_device ( cl::Device  device)

Sets the CL device for which the programs are created.

Parameters
[in]deviceA CL device.

Definition at line 915 of file CLKernelLibrary.cpp.

916 {
917  _compile_context.set_device(device);
918 }
void set_device(cl::Device device)
Sets the CL device for which the programs are created.

References CLCompileContext::set_device().

◆ set_kernel_path()

void set_kernel_path ( const std::string &  kernel_path)

Sets the path that the kernels reside in.

Parameters
[in]kernel_pathPath of the kernel.

Definition at line 899 of file CLKernelLibrary.cpp.

900 {
901  _kernel_path = std::move(kernel_path);
902  _kernel_path += "/";
903 }

The documentation for this class was generated from the following files: