23.05
|
Provides global access to a CL context and command queue. More...
#include <CLScheduler.h>
Public Member Functions | |
CLScheduler () | |
Constructor. More... | |
CLScheduler (const CLScheduler &)=delete | |
Prevent instances of this class from being copied (As this class contains pointers) More... | |
CLScheduler & | operator= (const CLScheduler &)=delete |
Prevent instances of this class from being copied (As this class contains pointers) More... | |
~CLScheduler ()=default | |
Default destructor. More... | |
void | default_init (ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr, CLBackendType cl_backend_type=CLBackendType::Native) |
Initialises the context and command queue used by the scheduler to default values and sets a default device and kernel path for the CLKernelLibrary. More... | |
void | default_init_with_context (cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr) |
Initialises the scheduler with context and device provided by the user. More... | |
void | default_reinit (ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr, CLBackendType cl_backend_type=CLBackendType::Native) |
Re-initializes the context and command queue used by the scheduler to default values and sets a default device and kernel path for the CLKernelLibrary. More... | |
void | enqueue (ICLKernel &kernel, bool flush=true) |
Schedule the execution of the passed kernel if possible. More... | |
void | enqueue_op (ICLKernel &kernel, ITensorPack &tensors, bool flush=true) |
Schedule the execution of the passed kernel if possible. More... | |
void | init (cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr, CLBackendType cl_backend_type=CLBackendType::Native) |
Initialises the context and command queue to be used by the scheduler. More... | |
cl::Context & | context () |
Accessor for the associated CL context. More... | |
cl::CommandQueue & | queue () |
Accessor for the associated CL command queue. More... | |
GPUTarget | target () const |
Get the target GPU. More... | |
CLGEMMHeuristicsHandle * | gemm_heuristics () const |
Accessor for the associated CLGEMMHeuristicsHandle. More... | |
void | set_context (cl::Context context) |
Accessor to set the CL context to be used by the scheduler. More... | |
void | set_queue (cl::CommandQueue queue) |
Accessor to set the CL command queue to be used by the scheduler. More... | |
void | set_target (GPUTarget target) |
Accessor to set target GPU to be used by the scheduler. More... | |
void | set_tuner (ICLTuner *tuner) |
Accessor to set the CL tuner to be used by the scheduler. More... | |
void | sync () |
Blocks until all commands in the associated command queue have finished. More... | |
cl::Event | enqueue_sync_event () |
Enqueues a marker into the associated command queue and return the event. More... | |
void | tune_kernel_static (ICLKernel &kernel) |
Tunes OpenCL kernel. More... | |
void | enable_job_chaining (int job_chaining_size) |
Enable job chaining. More... | |
bool | is_initialised () const |
Static Public Member Functions | |
static CLScheduler & | get () |
Access the scheduler singleton. More... | |
Provides global access to a CL context and command queue.
Definition at line 43 of file CLScheduler.h.
CLScheduler | ( | ) |
Constructor.
Definition at line 97 of file CLScheduler.cpp.
|
delete |
Prevent instances of this class from being copied (As this class contains pointers)
|
default |
Default destructor.
cl::Context & context | ( | ) |
Accessor for the associated CL context.
Definition at line 32 of file CLScheduler.cpp.
References ARM_COMPUTE_ERROR_ON, CLKernelLibrary::context(), and CLKernelLibrary::get().
Referenced by CLTensorAllocator::import_memory(), arm_compute::restore_program_cache_from_file(), and Framework::run().
void default_init | ( | ICLTuner * | cl_tuner = nullptr , |
CLGEMMHeuristicsHandle * | gemm_h = nullptr , |
||
CLBackendType | cl_backend_type = CLBackendType::Native |
||
) |
Initialises the context and command queue used by the scheduler to default values and sets a default device and kernel path for the CLKernelLibrary.
[in] | cl_tuner | (Optional) Pointer to ICLTuner (default=nullptr) |
[in] | gemm_h | (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr) |
[in] | cl_backend_type | (Optional) Type of backend to use (default = CLBackendType::Native) |
Definition at line 122 of file CLScheduler.cpp.
References ARM_COMPUTE_ERROR_ON_MSG, arm_compute::create_opencl_context_and_device(), CLKernelLibrary::get(), CLKernelLibrary::init(), CLScheduler::init(), and CLScheduler::queue().
Referenced by CLScheduler::default_reinit(), CLDeviceBackend::initialize_backend(), and arm_compute::restore_program_cache_from_file().
void default_init_with_context | ( | cl::Device & | device, |
cl::Context & | ctx, | ||
ICLTuner * | cl_tuner = nullptr , |
||
CLGEMMHeuristicsHandle * | gemm_h = nullptr |
||
) |
Initialises the scheduler with context and device provided by the user.
[in] | device | OpenCL device to be used |
[in] | ctx | OpenCL ctx to be used |
[in] | cl_tuner | (Optional) Pointer to ICLTuner (default=nullptr) |
[in] | gemm_h | (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr) |
Definition at line 110 of file CLScheduler.cpp.
References CLKernelLibrary::get(), CLKernelLibrary::init(), CLScheduler::init(), and CLScheduler::queue().
Referenced by main(), and arm_compute::utils::run_example().
void default_reinit | ( | ICLTuner * | cl_tuner = nullptr , |
CLGEMMHeuristicsHandle * | gemm_h = nullptr , |
||
CLBackendType | cl_backend_type = CLBackendType::Native |
||
) |
Re-initializes the context and command queue used by the scheduler to default values and sets a default device and kernel path for the CLKernelLibrary.
[in] | cl_tuner | (Optional) Pointer to ICLTuner (default=nullptr) |
[in] | gemm_h | (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr) |
[in] | cl_backend_type | (Optional) Type of backend to use (default = CLBackendType::Native) |
Definition at line 141 of file CLScheduler.cpp.
References CLScheduler::default_init().
Referenced by arm_compute::test::validation::TEST_CASE().
void enable_job_chaining | ( | int | job_chaining_size | ) |
Enable job chaining.
The command queue will only be flushed when job_chaining_size
kernels have been enqueued.
[in] | job_chaining_size | Kernels to enqueue before flushing |
Definition at line 216 of file CLScheduler.cpp.
void enqueue | ( | ICLKernel & | kernel, |
bool | flush = true |
||
) |
Schedule the execution of the passed kernel if possible.
[in] | kernel | Kernel to execute. |
[in] | flush | (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled. |
Definition at line 205 of file CLScheduler.cpp.
References arm_compute::test::validation::pack.
Referenced by CLSpaceToDepthLayer::run(), CLDeconvolutionLayerUpsample::run(), CLFFT1D::run(), CLStackLayer::run(), CLL2NormalizeLayer::run(), CLNormalizationLayer::run(), CLArgMinMaxLayer::run(), CLPadLayer::run(), CLReductionOperation::run(), CLDepthwiseConvolutionLayer::run(), CLMaxUnpoolingLayer::run(), CLFuseBatchNormalization::run(), CLBatchNormalizationLayer::run(), CLBatchToSpaceLayer::run(), CLSpaceToBatchLayer::run(), CLGEMMDeconvolutionLayer::run(), CLGenerateProposalsLayer::run(), CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder< K, bordersize >::run(), CLQLSTMLayer::run(), ClSynthetizeOperatorWithBorder< K >::run(), and arm_compute::schedule_kernel_on_ctx().
void enqueue_op | ( | ICLKernel & | kernel, |
ITensorPack & | tensors, | ||
bool | flush = true |
||
) |
Schedule the execution of the passed kernel if possible.
[in] | kernel | Kernel to execute. |
[in] | tensors | Vector containing the tensors to operate on. |
[in] | flush | (Optional) Specifies if the command queue will be flushed after running the kernel. This will be ignored if job chaining is enabled. |
Definition at line 211 of file CLScheduler.cpp.
Referenced by ClWorkloadRuntime::configure(), ClGemm::prepare(), ClGemmLowpMatrixMultiplyCore::prepare(), ClWinogradConv2d::prepare(), ClGemmConv2d::prepare(), CLQLSTMLayer::prepare(), ClDequantize::run(), ClQuantize::run(), ICLOperator::run(), ClScale::run(), ClWorkloadRuntime::run(), ClSoftmax::run(), ClConcatenate::run(), ClDirectConv2d::run(), ClDirectConv3d::run(), ClMatMul::run(), ClTransposedConvolution::run(), ClGemmLowpOutputStage::run(), ClIndirectConv2d::run(), ClGemm::run(), ClGemmLowpMatrixMultiplyCore::run(), ClWinogradConv2d::run(), CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder< K, bordersize >::run(), ClGemmConv2d::run(), CLLSTMLayer::run(), and ClSynthetizeOperatorWithBorder< K >::run().
cl::Event enqueue_sync_event | ( | ) |
Enqueues a marker into the associated command queue and return the event.
Definition at line 75 of file CLScheduler.cpp.
CLGEMMHeuristicsHandle * gemm_heuristics | ( | ) | const |
Accessor for the associated CLGEMMHeuristicsHandle.
Definition at line 50 of file CLScheduler.cpp.
Referenced by arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_native(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_reshaped(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_kernel(), and CLDeviceBackend::setup_backend_context().
|
static |
Access the scheduler singleton.
This method has been deprecated and will be removed in future releases
Definition at line 103 of file CLScheduler.cpp.
References arm_compute::opencl_is_available().
Referenced by CLTuner::add_tuning_params(), CLBufferAllocator::allocate(), ClQueue::cl_queue(), CLBufferMemoryRegion::CLBufferMemoryRegion(), ClScale::configure(), ClPool2d::configure(), ClPool3d::configure(), ClWorkloadRuntime::configure(), ClSoftmax::configure(), ClDirectConv2d::configure(), CLPriorBoxLayer::configure(), ClTransposedConvolution::configure(), ClMatMul::configure(), CLRange::configure(), ClIndirectConv2d::configure(), CLFFT1D::configure(), CLDepthwiseConvolutionLayer::configure(), ClGemm::configure(), ClGemmLowpMatrixMultiplyCore::configure(), CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder< K, bordersize >::configure(), ClWinogradConv2d::configure(), CLCropResize::configure(), ClGemmConv2d::configure(), ClConv2d::configure(), CLConvolutionLayer::configure(), CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder< K, bordersize >::configure(), arm_compute::test::validation::DATA_TEST_CASE(), ClQueue::finish(), CLTensorAllocator::import_memory(), CLDeviceBackend::initialize_backend(), main(), CLArray< cl_int >::map(), CLSubTensor::map(), CLTensor::map(), OpenCLClock< output_timestamps >::OpenCLClock(), ClGemm::prepare(), ClGemmLowpMatrixMultiplyCore::prepare(), ClWinogradConv2d::prepare(), ClGemmConv2d::prepare(), CLFFTConvolutionLayer::prepare(), CLQLSTMLayer::prepare(), arm_compute::restore_program_cache_from_file(), ClDequantize::run(), ClQuantize::run(), CLSplit::run(), ICLOperator::run(), ClScale::run(), ClWorkloadRuntime::run(), ClSoftmax::run(), ClConcatenate::run(), ClDirectConv2d::run(), ClDirectConv3d::run(), ClMatMul::run(), ClTransposedConvolution::run(), ClGemmLowpOutputStage::run(), CLSpaceToDepthLayer::run(), ClIndirectConv2d::run(), CLDeconvolutionLayerUpsample::run(), CLFFT1D::run(), ClGemm::run(), CLStackLayer::run(), ClGemmLowpMatrixMultiplyCore::run(), CLL2NormalizeLayer::run(), CLNormalizationLayer::run(), CLArgMinMaxLayer::run(), CLPadLayer::run(), CLReductionOperation::run(), CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder< K, bordersize >::run(), ClWinogradConv2d::run(), CLDepthwiseConvolutionLayer::run(), CLMaxUnpoolingLayer::run(), ClGemmConv2d::run(), CLFuseBatchNormalization::run(), CLCropResize::run(), CLBatchNormalizationLayer::run(), CLBatchToSpaceLayer::run(), CLSpaceToBatchLayer::run(), CLGEMMDeconvolutionLayer::run(), CLGenerateProposalsLayer::run(), CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder< K, bordersize >::run(), CLLSTMLayer::run(), CLQLSTMLayer::run(), ClSynthetizeOperatorWithBorder< K >::run(), Framework::run(), arm_compute::utils::run_example(), arm_compute::save_program_cache_to_file(), arm_compute::schedule_kernel_on_ctx(), ClQueue::scheduler(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_native(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_reshaped(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(), arm_compute::cl_gemm::auto_heuristics::select_mlgo_gemm_kernel(), ClContext::set_cl_ctx(), ClQueue::set_cl_queue(), CLTensorAllocator::set_global_allocator(), CLDeviceBackend::setup_backend_context(), CLDeviceBackend::sync(), arm_compute::test::sync_if_necessary(), arm_compute::test::validation::TEST_CASE(), OpenCLClock< output_timestamps >::test_measurements(), CLArray< cl_int >::unmap(), CLSubTensor::unmap(), CLTensor::unmap(), CLBufferMemoryRegion::unmap(), ClMatMul::validate(), ClGemm::validate(), ClGemmLowpMatrixMultiplyCore::validate(), CLDepthwiseConvolutionLayer::validate(), ClConv2d::validate(), CLGenerateProposalsLayer::validate(), and CLConvolutionLayer::validate().
void init | ( | cl::Context | context, |
cl::CommandQueue | queue, | ||
const cl::Device & | device, | ||
ICLTuner * | cl_tuner = nullptr , |
||
CLGEMMHeuristicsHandle * | gemm_h = nullptr , |
||
CLBackendType | cl_backend_type = CLBackendType::Native |
||
) |
Initialises the context and command queue to be used by the scheduler.
[in] | context | A CL context. |
[in] | queue | A CL command queue. |
[in] | device | A CL device. |
[in] | cl_tuner | (Optional) Pointer to OpenCL tuner (default=nullptr) Note: It is caller's responsibility to release the allocated memory for CLTuner |
[in] | gemm_h | (Optional) Pointer to CLGEMMHeuristicsHandle (default = nullptr) |
[in] | cl_backend_type | (Optional) Type of backend to use (default = CLBackendType::Native) |
Definition at line 154 of file CLScheduler.cpp.
References ARM_COMPUTE_ERROR_ON_MSG, ITensorPack::empty(), arm_compute::get_target_from_device(), ICLKernel::run(), ICLKernel::run_op(), CLScheduler::set_context(), ICLTuner::tune_kernel_dynamic(), and IKernel::window().
Referenced by CLScheduler::default_init(), and CLScheduler::default_init_with_context().
bool is_initialised | ( | ) | const |
Definition at line 90 of file CLScheduler.cpp.
Referenced by arm_compute::restore_program_cache_from_file().
|
delete |
Prevent instances of this class from being copied (As this class contains pointers)
cl::CommandQueue & queue | ( | ) |
Accessor for the associated CL command queue.
Definition at line 39 of file CLScheduler.cpp.
References ARM_COMPUTE_ERROR_ON.
Referenced by CLTuner::add_tuning_params(), ClQueue::cl_queue(), arm_compute::test::validation::DATA_TEST_CASE(), CLScheduler::default_init(), CLScheduler::default_init_with_context(), ClQueue::finish(), CLTensor::map(), OpenCLClock< output_timestamps >::OpenCLClock(), ClGemmLowpMatrixMultiplyCore::prepare(), ClWinogradConv2d::prepare(), CLFFTConvolutionLayer::prepare(), CLQLSTMLayer::prepare(), CLSplit::run(), Framework::run(), CLTensorAllocator::set_global_allocator(), arm_compute::test::validation::TEST_CASE(), OpenCLClock< output_timestamps >::test_measurements(), and CLTensor::unmap().
void set_context | ( | cl::Context | context | ) |
Accessor to set the CL context to be used by the scheduler.
[in] | context | A CL context. |
Definition at line 148 of file CLScheduler.cpp.
References CLKernelLibrary::get(), and CLKernelLibrary::set_context().
Referenced by CLScheduler::init(), Framework::run(), and ClContext::set_cl_ctx().
void set_queue | ( | cl::CommandQueue | queue | ) |
Accessor to set the CL command queue to be used by the scheduler.
[in] | queue | A CL command queue. |
Definition at line 55 of file CLScheduler.cpp.
Referenced by OpenCLClock< output_timestamps >::OpenCLClock(), Framework::run(), and ClQueue::set_cl_queue().
void set_target | ( | GPUTarget | target | ) |
Accessor to set target GPU to be used by the scheduler.
[in] | target | The target GPU. |
Definition at line 60 of file CLScheduler.cpp.
References CLScheduler::target().
void set_tuner | ( | ICLTuner * | tuner | ) |
Accessor to set the CL tuner to be used by the scheduler.
[in] | tuner | A CL tuner |
Definition at line 65 of file CLScheduler.cpp.
void sync | ( | ) |
Blocks until all commands in the associated command queue have finished.
Definition at line 70 of file CLScheduler.cpp.
Referenced by CLCropResize::configure(), main(), CLCropResize::run(), CLDeviceBackend::sync(), arm_compute::test::sync_if_necessary(), and arm_compute::test::validation::TEST_CASE().
GPUTarget target | ( | ) | const |
Get the target GPU.
Definition at line 45 of file CLScheduler.cpp.
Referenced by ClMatMul::configure(), CLDepthwiseConvolutionLayer::configure(), ClGemm::configure(), ClGemmLowpMatrixMultiplyCore::configure(), CLConvolutionLayer::configure(), CLScheduler::set_target(), ClMatMul::validate(), ClGemm::validate(), ClGemmLowpMatrixMultiplyCore::validate(), CLDepthwiseConvolutionLayer::validate(), ClConv2d::validate(), and CLConvolutionLayer::validate().
void tune_kernel_static | ( | ICLKernel & | kernel | ) |
Tunes OpenCL kernel.
[in] | kernel | Kernel to tune |
Definition at line 82 of file CLScheduler.cpp.
References ICLTuner::tune_kernel_static().
Referenced by ClScale::configure(), ClPool2d::configure(), ClPool3d::configure(), ClDirectConv2d::configure(), CLRange::configure(), and ClGemmConv2d::configure().