21.02
|
Provides global access to a CL context and command queue. More...
#include <CLScheduler.h>
Public Member Functions | |
CLScheduler () | |
Constructor. More... | |
CLScheduler (const CLScheduler &)=delete | |
Prevent instances of this class from being copied (As this class contains pointers) More... | |
CLScheduler & | operator= (const CLScheduler &)=delete |
Prevent instances of this class from being copied (As this class contains pointers) More... | |
~CLScheduler ()=default | |
Default destructor. More... | |
void | default_init (ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr) |
Initialises the context and command queue used by the scheduler to default values and sets a default device and kernel path for the CLKernelLibrary. More... | |
void | default_init_with_context (cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr) |
Initialises the scheduler with context and device provided by the user. More... | |
void | enqueue (ICLKernel &kernel, bool flush=true) |
Schedule the execution of the passed kernel if possible. More... | |
void | enqueue_op (ICLKernel &kernel, ITensorPack &tensors, bool flush=true) |
Schedule the execution of the passed kernel if possible. More... | |
void | init (cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr) |
Initialises the context and command queue to be used by the scheduler. More... | |
cl::Context & | context () |
Accessor for the associated CL context. More... | |
cl::CommandQueue & | queue () |
Accessor for the associated CL command queue. More... | |
GPUTarget | target () const |
Get the target GPU. More... | |
CLGEMMHeuristicsHandle * | gemm_heuristics () const |
Accessor for the associated CLGEMMHeuristicsHandle. More... | |
void | set_context (cl::Context context) |
Accessor to set the CL context to be used by the scheduler. More... | |
void | set_queue (cl::CommandQueue queue) |
Accessor to set the CL command queue to be used by the scheduler. More... | |
void | set_target (GPUTarget target) |
Accessor to set target GPU to be used by the scheduler. More... | |
void | set_tuner (ICLTuner *tuner) |
Accessor to set the CL tuner to be used by the scheduler. More... | |
void | sync () |
Blocks until all commands in the associated command queue have finished. More... | |
cl::Event | enqueue_sync_event () |
Enqueues a marker into the associated command queue and return the event. More... | |
void | tune_kernel_static (ICLKernel &kernel) |
Tunes OpenCL kernel. More... | |
bool | is_initialised () const |
Static Public Member Functions | |
static CLScheduler & | get () |
Access the scheduler singleton. More... | |
Provides global access to a CL context and command queue.
Definition at line 41 of file CLScheduler.h.
CLScheduler | ( | ) |
Constructor.
Definition at line 99 of file CLScheduler.cpp.
|
delete |
Prevent instances of this class from being copied (As this class contains pointers)
|
default |
Default destructor.
cl::Context & context | ( | ) |
Accessor for the associated CL context.
Definition at line 34 of file CLScheduler.cpp.
References ARM_COMPUTE_ERROR_ON, CLKernelLibrary::context(), and CLKernelLibrary::get().
Referenced by CLBufferMemoryRegion::CLBufferMemoryRegion(), CLTensorAllocator::import_memory(), arm_compute::restore_program_cache_from_file(), Framework::run(), and CLBufferMemoryRegion::unmap().
void default_init | ( | ICLTuner * | cl_tuner = nullptr , |
CLGEMMHeuristicsHandle * | gemm_h = nullptr |
||
) |
Initialises the context and command queue used by the scheduler to default values and sets a default device and kernel path for the CLKernelLibrary.
[in] | cl_tuner | (Optional) Pointer to ICLTuner (default=nullptr) |
[in] | gemm_h | (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr) |
Definition at line 124 of file CLScheduler.cpp.
References ARM_COMPUTE_ERROR_ON_MSG, arm_compute::create_opencl_context_and_device(), TunerFactory::create_tuner(), CLKernelLibrary::get(), CLKernelLibrary::init(), CLScheduler::init(), and CLScheduler::queue().
Referenced by CLDeviceBackend::initialize_backend(), arm_compute::restore_program_cache_from_file(), and arm_compute::test::validation::TEST_CASE().
void default_init_with_context | ( | cl::Device & | device, |
cl::Context & | ctx, | ||
ICLTuner * | cl_tuner = nullptr , |
||
CLGEMMHeuristicsHandle * | gemm_h = nullptr |
||
) |
Initialises the scheduler with context and device provided by the user.
[in] | device | OpenCL device to be used |
[in] | ctx | OpenCL ctx to be used |
[in] | cl_tuner | (Optional) Pointer to ICLTuner (default=nullptr) |
[in] | gemm_h | (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr) |
Definition at line 111 of file CLScheduler.cpp.
References TunerFactory::create_tuner(), CLKernelLibrary::get(), CLKernelLibrary::init(), CLScheduler::init(), and CLScheduler::queue().
Referenced by main(), and arm_compute::utils::run_example().
void enqueue | ( | ICLKernel & | kernel, |
bool | flush = true |
||
) |
Schedule the execution of the passed kernel if possible.
[in] | kernel | Kernel to execute. |
[in] | flush | (Optional) Specifies if the command queue will be flushed after running the kernel. |
Definition at line 183 of file CLScheduler.cpp.
Referenced by CLMeanStdDev::configure(), CLGEMMLowpMatrixMultiplyCore::prepare(), CLWinogradConvolutionLayer::prepare(), CLGEMM::prepare(), CLQLSTMLayer::prepare(), CLGEMMReshapeRHSMatrixKernelManaged::run(), CLHistogram::run(), CLIntegralImage::run(), CLEqualizeHistogram::run(), CLSpaceToDepthLayer::run(), CLHOGGradient::run(), CLHOGDescriptor::run(), CLDeconvolutionLayerUpsample::run(), CLFFT1D::run(), CLScale::run(), CLGaussian5x5::run(), CLSobel5x5::run(), CLSobel7x7::run(), CLFastCorners::run(), CLHOGDetector::run(), CLCannyEdge::run(), CLStackLayer::run(), CLArgMinMaxLayer::run(), CLL2NormalizeLayer::run(), CLMinMaxLocation::run(), CLNormalizationLayer::run(), CLReductionOperation::run(), CLConvolutionLayerReshapeWeights::run(), CLPadLayer::run(), CLMaxUnpoolingLayer::run(), CLDirectConvolutionLayer::run(), CLSoftmaxLayerGeneric< IS_LOG >::run(), CLHarrisCorners::run(), CLBatchToSpaceLayer::run(), CLGaussianPyramidHalf::run(), CLGEMMLowpMatrixMultiplyCore::run(), CLFuseBatchNormalization::run(), CLHOGMultiDetection::run(), CLBatchNormalizationLayer::run(), CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder< K, bordersize >::run(), CLSpaceToBatchLayer::run(), CLOpticalFlow::run(), CLWinogradConvolutionLayer::run(), CLGEMMDeconvolutionLayer::run(), CLConvolutionSquare< matrix_size >::run(), CLGenerateProposalsLayer::run(), CLGaussianPyramidOrb::run(), CLGEMM::run(), CLLSTMLayer::run(), CLQLSTMLayer::run(), CLGEMMConvolutionLayer::run(), and arm_compute::schedule_kernel_on_ctx().
void enqueue_op | ( | ICLKernel & | kernel, |
ITensorPack & | tensors, | ||
bool | flush = true |
||
) |
Schedule the execution of the passed kernel if possible.
[in] | kernel | Kernel to execute. |
[in] | tensors | Vector containing the tensors to operate on. |
[in] | flush | (Optional) Specifies if the command queue will be flushed after running the kernel. |
Definition at line 189 of file CLScheduler.cpp.
Referenced by ICLOperator::run(), ClPooling::run(), ClConcatenate::run(), CLPixelWiseMultiplication::run(), and CLComplexPixelWiseMultiplication::run().
cl::Event enqueue_sync_event | ( | ) |
Enqueues a marker into the associated command queue and return the event.
Definition at line 77 of file CLScheduler.cpp.
CLGEMMHeuristicsHandle * gemm_heuristics | ( | ) | const |
Accessor for the associated CLGEMMHeuristicsHandle.
Definition at line 52 of file CLScheduler.cpp.
Referenced by arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_native(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_reshaped(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_kernel(), and CLDeviceBackend::setup_backend_context().
|
static |
Access the scheduler singleton.
This method has been deprecated and will be removed in future releases
Definition at line 104 of file CLScheduler.cpp.
References arm_compute::opencl_is_available().
Referenced by CLTuner::add_tuning_params(), CLBufferAllocator::allocate(), CLTensorAllocator::allocate(), CLBufferMemoryRegion::CLBufferMemoryRegion(), CLLut::clear(), ClPooling::configure(), CLPriorBoxLayer::configure(), CLRange::configure(), CLMeanStdDev::configure(), CLFFT1D::configure(), CLScale::configure(), CLDirectConvolutionLayer::configure(), CLDepthwiseConvolutionLayer::configure(), CLHOGDetector::configure(), CLFastCorners::configure(), CLMinMaxLocation::configure(), CLSoftmaxLayerGeneric< IS_LOG >::configure(), CLCropResize::configure(), CLGEMMLowpMatrixMultiplyCore::configure(), CLHOGMultiDetection::configure(), CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder< K, bordersize >::configure(), CLConvolutionLayer::configure(), CLGEMM::configure(), CLGEMMConvolutionLayer::configure(), arm_compute::test::validation::DATA_TEST_CASE(), CLTensorAllocator::import_memory(), CLHOG::init(), CLDeviceBackend::initialize_backend(), main(), CLHOG::map(), CLDistribution1D::map(), CLLut::map(), CLArray< cl_int >::map(), CLSubTensor::map(), CLTensor::map(), OpenCLClock< output_timestamps >::OpenCLClock(), CLGEMMLowpMatrixMultiplyCore::prepare(), CLWinogradConvolutionLayer::prepare(), CLFFTConvolutionLayer::prepare(), CLGEMM::prepare(), CLFullyConnectedLayer::prepare(), CLQLSTMLayer::prepare(), CLGEMMConvolutionLayer::prepare(), arm_compute::restore_program_cache_from_file(), CLSplit::run(), ICLOperator::run(), CLGEMMReshapeRHSMatrixKernelManaged::run(), ClPooling::run(), CLHistogram::run(), CLIntegralImage::run(), CLEqualizeHistogram::run(), ClConcatenate::run(), CLHOGGradient::run(), CLSpaceToDepthLayer::run(), CLHOGDescriptor::run(), CLDeconvolutionLayerUpsample::run(), CLFFT1D::run(), CLScale::run(), CLGaussian5x5::run(), CLSobel7x7::run(), CLSobel5x5::run(), CLFastCorners::run(), CLHOGDetector::run(), CLCannyEdge::run(), CLStackLayer::run(), CLArgMinMaxLayer::run(), CLL2NormalizeLayer::run(), CLMinMaxLocation::run(), CLNormalizationLayer::run(), CLReductionOperation::run(), CLPadLayer::run(), CLConvolutionLayerReshapeWeights::run(), CLMaxUnpoolingLayer::run(), CLDirectConvolutionLayer::run(), CLSoftmaxLayerGeneric< IS_LOG >::run(), CLPixelWiseMultiplication::run(), CLBatchToSpaceLayer::run(), CLHarrisCorners::run(), CLGaussianPyramidHalf::run(), CLGEMMLowpMatrixMultiplyCore::run(), CLFuseBatchNormalization::run(), CLHOGMultiDetection::run(), CLBatchNormalizationLayer::run(), CLCropResize::run(), CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder< K, bordersize >::run(), CLSpaceToBatchLayer::run(), CLWinogradConvolutionLayer::run(), CLOpticalFlow::run(), CLGEMMDeconvolutionLayer::run(), CLConvolutionSquare< matrix_size >::run(), CLGenerateProposalsLayer::run(), CLGaussianPyramidOrb::run(), CLComplexPixelWiseMultiplication::run(), CLGEMM::run(), CLLSTMLayer::run(), CLQLSTMLayer::run(), Framework::run(), CLGEMMConvolutionLayer::run(), arm_compute::utils::run_example(), arm_compute::save_program_cache_to_file(), arm_compute::schedule_kernel_on_ctx(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_native(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_reshaped(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_kernel(), CLTensorAllocator::set_associated_memory_group(), CLDeviceBackend::setup_backend_context(), arm_compute::test::sync_if_necessary(), arm_compute::test::validation::TEST_CASE(), OpenCLClock< output_timestamps >::test_measurements(), CLHOG::unmap(), CLDistribution1D::unmap(), CLLut::unmap(), CLLutAllocator::unmap(), CLArray< cl_int >::unmap(), CLSubTensor::unmap(), CLTensor::unmap(), CLBufferMemoryRegion::unmap(), CLDirectConvolutionLayer::validate(), CLDepthwiseConvolutionLayer::validate(), CLGEMMLowpMatrixMultiplyCore::validate(), CLGenerateProposalsLayer::validate(), CLConvolutionLayer::validate(), and CLGEMM::validate().
void init | ( | cl::Context | context, |
cl::CommandQueue | queue, | ||
const cl::Device & | device, | ||
ICLTuner * | cl_tuner = nullptr , |
||
CLGEMMHeuristicsHandle * | gemm_h = nullptr |
||
) |
Initialises the context and command queue to be used by the scheduler.
[in] | context | A CL context. |
[in] | queue | A CL command queue. |
[in] | device | A CL device. |
[in] | cl_tuner | (Optional) Pointer to OpenCL tuner (default=nullptr) Note: It is caller's responsibility to release the allocated memory for CLTuner |
[in] | gemm_h | (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr) |
Definition at line 150 of file CLScheduler.cpp.
References ARM_COMPUTE_ERROR_ON_MSG, ITensorPack::empty(), arm_compute::get_target_from_device(), ICLKernel::run(), ICLKernel::run_op(), CLScheduler::set_context(), ICLTuner::tune_kernel_dynamic(), and IKernel::window().
Referenced by CLScheduler::default_init(), and CLScheduler::default_init_with_context().
bool is_initialised | ( | ) | const |
Definition at line 92 of file CLScheduler.cpp.
Referenced by arm_compute::restore_program_cache_from_file().
|
delete |
Prevent instances of this class from being copied (As this class contains pointers)
cl::CommandQueue & queue | ( | ) |
Accessor for the associated CL command queue.
Definition at line 41 of file CLScheduler.cpp.
References ARM_COMPUTE_ERROR_ON.
Referenced by CLTuner::add_tuning_params(), CLLut::clear(), arm_compute::test::validation::DATA_TEST_CASE(), CLScheduler::default_init(), CLScheduler::default_init_with_context(), CLTensor::map(), OpenCLClock< output_timestamps >::OpenCLClock(), CLGEMMLowpMatrixMultiplyCore::prepare(), CLWinogradConvolutionLayer::prepare(), CLFFTConvolutionLayer::prepare(), CLGEMM::prepare(), CLFullyConnectedLayer::prepare(), CLQLSTMLayer::prepare(), CLGEMMConvolutionLayer::prepare(), CLSplit::run(), CLFastCorners::run(), CLHOGDetector::run(), CLMinMaxLocation::run(), Framework::run(), CLTensorAllocator::set_associated_memory_group(), arm_compute::test::validation::TEST_CASE(), OpenCLClock< output_timestamps >::test_measurements(), CLLutAllocator::unmap(), and CLTensor::unmap().
void set_context | ( | cl::Context | context | ) |
Accessor to set the CL context to be used by the scheduler.
[in] | context | A CL context. |
Definition at line 144 of file CLScheduler.cpp.
References CLKernelLibrary::get(), and CLKernelLibrary::set_context().
Referenced by CLScheduler::init(), and Framework::run().
void set_queue | ( | cl::CommandQueue | queue | ) |
Accessor to set the CL command queue to be used by the scheduler.
[in] | queue | A CL command queue. |
Definition at line 57 of file CLScheduler.cpp.
Referenced by OpenCLClock< output_timestamps >::OpenCLClock(), and Framework::run().
void set_target | ( | GPUTarget | target | ) |
Accessor to set target GPU to be used by the scheduler.
[in] | target | The target GPU. |
Definition at line 62 of file CLScheduler.cpp.
References CLScheduler::target().
void set_tuner | ( | ICLTuner * | tuner | ) |
Accessor to set the CL tuner to be used by the scheduler.
[in] | tuner | A CL tuner |
Definition at line 67 of file CLScheduler.cpp.
void sync | ( | ) |
Blocks until all commands in the associated command queue have finished.
Definition at line 72 of file CLScheduler.cpp.
Referenced by CLCropResize::configure(), main(), CLCropResize::run(), arm_compute::test::sync_if_necessary(), and arm_compute::test::validation::TEST_CASE().
GPUTarget target | ( | ) | const |
Get the target GPU.
Definition at line 47 of file CLScheduler.cpp.
Referenced by CLDepthwiseConvolutionLayer::configure(), CLGEMMLowpMatrixMultiplyCore::configure(), CLConvolutionLayer::configure(), CLGEMM::configure(), CLScheduler::set_target(), CLDepthwiseConvolutionLayer::validate(), CLGEMMLowpMatrixMultiplyCore::validate(), CLConvolutionLayer::validate(), and CLGEMM::validate().
void tune_kernel_static | ( | ICLKernel & | kernel | ) |
Tunes OpenCL kernel.
[in] | kernel | Kernel to tune |
Definition at line 84 of file CLScheduler.cpp.
References ICLTuner::tune_kernel_static().
Referenced by ClPooling::configure(), CLRange::configure(), CLScale::configure(), CLDirectConvolutionLayer::configure(), and CLGEMMConvolutionLayer::configure().