24.04
|
Go to the documentation of this file.
34 namespace cl_indirect_conv
50 &ClIndirectConvDefaultConfigValhall::configure_G77_f32, &ClIndirectConvDefaultConfigValhall::configure_G77_f16);
56 ConfigurationFunctionExecutorPtr func = configs_G77.
get_function(
src->data_type());
72 const int32_t stride_x =
conv_info.stride().first;
73 const int32_t stride_y =
conv_info.stride().second;
107 DirectConvComputeKernelInfo ClIndirectConvDefaultConfigValhall::configure_G77_f16(
const ITensorInfo *
src,
108 const ITensorInfo *wei,
111 DirectConvComputeKernelInfo desc;
115 const TensorShape wei_shape = wei->tensor_shape();
121 const int32_t k = wei_shape[0];
123 desc.export_weights_to_cl_image = export_weights_to_cl_image;
133 else if ((k % 8) == 0)
149 if (m >= 16000 && k < 4)
Basic interface for the indirect convolution kernel configuration.
SimpleTensor< float > src
bool export_to_cl_image(const ITensorInfo *tensor)
@ NHWC
Num samples, height, width, channels.
Valhall based OpenCL indirect convolution configuration.
Basic container for the OpenCL indirect convolution configuration functions.
int32_t n0
Number of columns to be processed by the kernel.
#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg)
int32_t m0
Number of rows to be processed by the kernel.
T get_function(DataType data_type)
Method to return the indirect convolution configuration function based on data type.
GPUTarget
Available GPU Targets.
Copyright (c) 2017-2024 Arm Limited.
bool export_weights_to_cl_image
Flag to export the weights to cl_image.
Store the tensor's metadata.
ClIndirectConvDefaultConfigValhall(GPUTarget gpu)
Constructor.
TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, DataLayout input_data_layout, const TensorShape &weights_shape, const PadStrideInfo &conv_info)
Calculate the deep convolution shape output shape of a tensor.
int32_t k0
Number of partial accumulations to be processed in a single iteration by the kernel.
DirectConvComputeKernelInfo configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info) override
This method returns the DirectConvComputeKernelInfo for the given inputs.
Compute descriptor used by the direct convolution kernel.