24.04
|
Go to the documentation of this file.
43 std::pair<Coordinates, Coordinates>
44 compute_start_end_slice_coordinates(
const ITensorInfo &
output_info,
const PadStrideInfo &deconv_info,
bool is_nchw)
51 start.set(0, deconv_info.pad_left());
52 start.set(1, deconv_info.pad_top());
59 start.set(1, deconv_info.pad_left());
60 start.set(2, deconv_info.pad_top());
69 Status construct_gemmlowp_output_stage(
const ITensorInfo *
input,
70 const ITensorInfo *weights,
71 const ITensorInfo *output,
72 GEMMLowpOutputStageInfo &output_stage_info)
78 const UniformQuantizationInfo iq_info =
input->quantization_info().uniform();
79 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
80 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
82 float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
83 int output_multiplier(0);
89 output_stage_info.gemmlowp_multiplier = output_multiplier;
90 output_stage_info.gemmlowp_shift = output_shift;
91 output_stage_info.gemmlowp_offset = oq_info.offset;
93 output_stage_info.gemmlowp_min_bound = (std::get<0>(min_max_bound)).get<int32_t>();
94 output_stage_info.gemmlowp_max_bound = (std::get<1>(min_max_bound)).get<int32_t>();
95 output_stage_info.output_data_type =
data_type;
103 : _memory_group(std::move(memory_manager)),
106 _gemmlowp_output_stage(),
107 _permute_input_to_nhwc(),
108 _permute_weights_to_nhwc(),
110 _transpose_weights(),
115 _reshaped_weights_t(),
122 _padded_input(false),
143 const bool padded_input = deconv_info.
pad_bottom() > 0 || deconv_info.
pad_left() > 0 ||
164 ->set_is_resizable(
true)
166 .set_tensor_shape(nhwc_input_shape)
170 ->set_is_resizable(
true)
172 .set_tensor_shape(nhwc_weights_shape)
180 TensorShape(nhwc_weights_shape[0], nhwc_weights_shape[1] * nhwc_weights_shape[2] * nhwc_weights_shape[3]);
182 weights->
clone()->set_tensor_shape(reshaped_shape).set_data_layout(
DataLayout::NCHW).set_is_resizable(
true);
185 TensorShape transposed_shape(reshaped_shape[1], reshaped_shape[0]);
186 const TensorInfo reshaped_t_info = reshaped_info.
clone()->set_is_resizable(
true).set_tensor_shape(transposed_shape);
190 input->dimension(idx_w),
input->dimension(idx_h),
input->dimension(idx_b));
192 TensorInfo gemm_output_info = reshaped_t_info.
clone()->set_tensor_shape(gemm_output_shape).set_is_resizable(
true);
193 GEMMInfo gemm_info(
false,
false,
true,
input->dimension(idx_h),
true);
200 &
input->clone()->set_tensor_shape(nhwc_input_shape), &reshaped_t_info,
nullptr,
208 &reshaped_t_info,
nullptr, &gemm_output_info, 1.0f, 0.0f, gemm_info));
216 TensorInfo col2im_output_info = gemm_output_info.
clone()->set_tensor_shape(deconv_shape).set_is_resizable(
true);
218 if (padded_input && is_quantized)
220 const auto start_end = compute_start_end_slice_coordinates(col2im_output_info, deconv_info, is_nchw);
222 &gemm_output_info,
bias, &col2im_output_info,
input, weights, deconv_info));
224 &col2im_output_info,
nullptr,
225 &col2im_output_info.
clone()->set_is_resizable(
true).set_data_type(
input->data_type()), output_stage_info));
228 output, start_end.first, start_end.second));
230 else if (padded_input)
232 const auto start_end = compute_start_end_slice_coordinates(col2im_output_info, deconv_info, is_nchw);
234 &gemm_output_info,
bias, &col2im_output_info,
input, weights, deconv_info));
237 else if (is_quantized)
240 &gemm_output_info,
bias, &col2im_output_info,
input, weights, deconv_info));
271 input->info(), weights->
info(),
bias !=
nullptr ?
bias->info() :
nullptr, output->
info(), deconv_info));
274 _original_weights = weights;
281 const ICLTensor *weights_to_use = weights;
289 _memory_group.
manage(&_permuted_input);
294 input_to_use = &_permuted_input;
295 weights_to_use = &_permuted_weights;
305 _reshape_weights.
configure(compile_context, weights_to_use, &_reshaped_weights);
306 _transpose_weights.
configure(compile_context, &_reshaped_weights, &_reshaped_weights_t);
309 GEMMInfo gemm_info(
false,
false,
true,
input->info()->dimension(idx_h),
true);
324 _mm_gemmlowp.
configure(compile_context, input_to_use, &_reshaped_weights_t,
nullptr, &_gemm_output, gemm_info);
331 _mm_gemm.
configure(compile_context, input_to_use, &_reshaped_weights_t,
nullptr, &_gemm_output, 1.f, 0.0f,
340 ICLTensor *deconv_reshape_output =
nullptr;
342 ICLTensor *output_stage_output =
nullptr;
344 if (_padded_input && _is_quantized)
346 _memory_group.
manage(&_slice_gemm_input);
347 _memory_group.
manage(&_gemmlowp_final);
348 deconv_reshape_output = &_gemmlowp_final;
349 output_stage_output = &_slice_gemm_input;
350 slice_output = output;
352 else if (_padded_input)
354 _memory_group.
manage(&_slice_gemm_input);
355 deconv_reshape_output = &_slice_gemm_input;
356 slice_output = output;
358 else if (_is_quantized)
360 _memory_group.
manage(&_gemmlowp_final);
361 deconv_reshape_output = &_gemmlowp_final;
362 output_stage_output = output;
366 deconv_reshape_output = output;
370 _deconv_reshape->configure(compile_context, &_gemm_output,
bias, deconv_reshape_output,
input->info(),
371 weights->
info(), deconv_info);
377 construct_gemmlowp_output_stage(
input->info(), weights->
info(), output->
info(), output_stage_info);
378 _gemmlowp_output_stage.
configure(compile_context, &_gemmlowp_final,
nullptr, output_stage_output,
386 const auto start_end =
387 compute_start_end_slice_coordinates(*deconv_reshape_output->
info(), deconv_info, _is_nchw);
388 _slice_gemm.
configure(compile_context, &_slice_gemm_input, slice_output, start_end.first, start_end.second);
401 _permute_input_to_nhwc.
run();
417 _gemmlowp_output_stage.
run();
435 _permute_weights_to_nhwc.
run();
439 _reshape_weights.
run();
447 _transpose_weights.
run();
460 if (!_reshaped_weights_t.
is_used())
@ NCHW
Num samples, channels, height, width.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTranspose.
std::unique_ptr< ITensorInfo > clone() const override
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
Quantization information.
DataLayout
[DataLayout enum definition]
unsigned int pad_right() const
Get the right padding.
std::pair< unsigned int, unsigned int > deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, unsigned int kernel_width, unsigned int kernel_height, const PadStrideInfo &pad_stride_info)
Returns expected width and height of the deconvolution's output tensor.
GEMMLowp output stage info.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
@ QASYMM8
quantized, asymmetric fixed-point 8-bit number unsigned
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
void run() override
Run the kernels contained in the function.
Interface for OpenCL tensor.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info)
Set the input, weights, biases and output tensors.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration of CLPermute.
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
ITensorInfo & set_data_type(DataType data_type) override
Set the data type to the specified value.
void prepare() override
Prepare the function for executing.
void run() override
Run the kernels contained in the function.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
~CLGEMMDeconvolutionLayer()
Default desctructor.
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
constexpr auto data_layout
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Strides PermutationVector
Permutation vector.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
Static function to check if given info will lead to a valid configuration of CLSlice.
TensorShape compute_deconvolution_output_shape(const std::pair< unsigned int, unsigned int > &out_dims, const ITensorInfo &input, const ITensorInfo &weights)
Calculate the output shape of the deconvolution layer.
void run() override
Run the kernels contained in the function.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
void run() override
Run the kernels contained in the function.
void mark_as_unused() const
Marks a tensor as unused.
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClGemmL...
Interface for the OpenCL kernel to be used for reshaping the tensor before returning the result of de...
@ QASYMM8_SIGNED
quantized, asymmetric fixed-point 8-bit number signed
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
void prepare() override
Prepare the function for executing.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and output.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
Set the input and output tensors.
static CLScheduler & get()
Access the scheduler singleton.
void run() override
Run the kernels contained in the function.
void run() override
Run the kernels contained in the function.
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
unsigned int pad_left() const
Get the left padding.
void run() override
Run the kernels contained in the function.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &deconv_info)
Static function to check if given info will lead to a valid configuration of CLDeconvolutionLayer.
Store the tensor's metadata.
unsigned int pad_bottom() const
Get the bottom padding.
bool is_used() const
Flags if the tensor is used or not.
void prepare() override
Prepare the function for executing.
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info)
Static function to check if given info will lead to a valid configuration of CLDeconvolutionReshapeOu...
void free() override
Free allocated OpenCL memory.
Memory group resources scope handling class.
Copyright (c) 2017-2024 Arm Limited.
@ F16
16-bit floating-point number
@ S32
signed 32-bit number
void end(TokenStream &in, bool &valid)
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
CLGEMMDeconvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
Store the tensor's metadata.
@ F32
32-bit floating-point number
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and outputs.
#define ARM_COMPUTE_LOG_PARAMS(...)
@ QUANTIZE_DOWN_FIXEDPOINT
Quantize using a fixed point multiplication.
unsigned int pad_top() const
Get the top padding.
virtual ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info)=0
Set the quantization settings (scale and offset) of the tensor.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLReshapeLayer.