53 std::pair<Coordinates, Coordinates> compute_start_end_slice_coordinates(
const ITensorInfo &
output_info,
const PadStrideInfo &deconv_info,
bool is_nchw)
60 start.set(0, deconv_info.pad_left());
61 start.set(1, deconv_info.pad_top());
68 start.set(1, deconv_info.pad_left());
69 start.set(2, deconv_info.pad_top());
76 return { start,
end };
78 Status construct_gemmlowp_output_stage(
const ITensorInfo *
input,
const ITensorInfo *weights,
const ITensorInfo *output, GEMMLowpOutputStageInfo &output_stage_info)
84 const UniformQuantizationInfo iq_info =
input->quantization_info().uniform();
85 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
86 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
88 float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
89 int output_multiplier(0);
94 output_stage_info.gemmlowp_multiplier = output_multiplier;
95 output_stage_info.gemmlowp_shift = output_shift;
96 output_stage_info.gemmlowp_offset = oq_info.offset;
98 output_stage_info.gemmlowp_min_bound = (std::get<0>(min_max_bound)).get<int32_t>();
99 output_stage_info.gemmlowp_max_bound = (std::get<1>(min_max_bound)).get<int32_t>();
100 output_stage_info.output_data_type =
data_type;
108 : _memory_group(std::move(memory_manager)),
111 _gemmlowp_output_stage(),
112 _permute_input_to_nhwc(),
113 _permute_weights_to_nhwc(),
115 _transpose_weights(),
120 _reshaped_weights_t(),
127 _padded_input(false),
162 TensorInfo nhwc_input_info =
input->clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(nhwc_input_shape).set_data_layout(
DataLayout::NCHW);
164 TensorInfo nhwc_weights_info = weights->
clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(nhwc_weights_shape).set_data_layout(
DataLayout::NCHW);
170 const TensorShape reshaped_shape =
TensorShape(nhwc_weights_shape[0], nhwc_weights_shape[1] * nhwc_weights_shape[2] * nhwc_weights_shape[3]);
174 TensorShape transposed_shape(reshaped_shape[1], reshaped_shape[0]);
175 const TensorInfo reshaped_t_info = reshaped_info.
clone()->set_is_resizable(
true).set_tensor_shape(transposed_shape);
179 input->dimension(idx_w),
180 input->dimension(idx_h),
181 input->dimension(idx_b));
183 TensorInfo gemm_output_info = reshaped_t_info.
clone()->set_tensor_shape(gemm_output_shape).set_is_resizable(
true);
184 GEMMInfo gemm_info(
false,
false,
true,
input->dimension(idx_h),
true);
202 TensorInfo col2im_output_info = gemm_output_info.
clone()->set_tensor_shape(deconv_shape).set_is_resizable(
true);
204 if(padded_input && is_quantized)
206 const auto start_end = compute_start_end_slice_coordinates(col2im_output_info, deconv_info, is_nchw);
211 else if(padded_input)
213 const auto start_end = compute_start_end_slice_coordinates(col2im_output_info, deconv_info, is_nchw);
217 else if(is_quantized)
241 bias !=
nullptr ? bias->
info() :
nullptr,
245 _original_weights = weights;
251 const ICLTensor *weights_to_use = weights;
259 _memory_group.
manage(&_permuted_input);
264 input_to_use = &_permuted_input;
265 weights_to_use = &_permuted_weights;
274 _reshape_weights.
configure(compile_context, weights_to_use, &_reshaped_weights);
275 _transpose_weights.
configure(compile_context, &_reshaped_weights, &_reshaped_weights_t);
278 GEMMInfo gemm_info(
false,
false,
true,
input->info()->dimension(idx_h),
true);
291 _mm_gemmlowp.
configure(compile_context, input_to_use, &_reshaped_weights_t,
nullptr, &_gemm_output, gemm_info);
293 input_to_use->info()->set_quantization_info(iq_info);
298 _mm_gemm.
configure(compile_context, input_to_use, &_reshaped_weights_t,
nullptr, &_gemm_output, 1.f, 0.0f, gemm_info);
306 ICLTensor *deconv_reshape_output =
nullptr;
308 ICLTensor *output_stage_output =
nullptr;
310 if(_padded_input && _is_quantized)
312 _memory_group.
manage(&_slice_gemm_input);
313 _memory_group.
manage(&_gemmlowp_final);
314 deconv_reshape_output = &_gemmlowp_final;
315 output_stage_output = &_slice_gemm_input;
316 slice_output = output;
318 else if(_padded_input)
320 _memory_group.
manage(&_slice_gemm_input);
321 deconv_reshape_output = &_slice_gemm_input;
322 slice_output = output;
324 else if(_is_quantized)
326 _memory_group.
manage(&_gemmlowp_final);
327 deconv_reshape_output = &_gemmlowp_final;
328 output_stage_output = output;
332 deconv_reshape_output = output;
336 _deconv_reshape->configure(compile_context, &_gemm_output, bias, deconv_reshape_output,
input->info(), weights->
info(), deconv_info);
342 construct_gemmlowp_output_stage(
input->info(), weights->
info(), output->
info(), output_stage_info);
343 _gemmlowp_output_stage.
configure(compile_context, &_gemmlowp_final,
nullptr, output_stage_output, output_stage_info);
350 const auto start_end = compute_start_end_slice_coordinates(*deconv_reshape_output->
info(), deconv_info, _is_nchw);
351 _slice_gemm.
configure(compile_context, &_slice_gemm_input, slice_output, start_end.first, start_end.second);
364 _permute_input_to_nhwc.
run();
380 _gemmlowp_output_stage.
run();
398 _permute_weights_to_nhwc.
run();
402 _reshape_weights.
run();
410 _transpose_weights.
run();
423 if(!_reshaped_weights_t.
is_used())
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info)
Set the input, weights, biases and output tensors.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLReshapeLayer.
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info)
Static function to check if given info will lead to a valid configuration of CLDeconvolutionReshapeOu...
Quantize using a fixed point multiplication.
void prepare() override
Prepare the function for executing.
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
void prepare() override
Prepare the function for executing.
void run() override
Run the kernels contained in the function.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
std::pair< unsigned int, unsigned int > deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, unsigned int kernel_width, unsigned int kernel_height, const PadStrideInfo &pad_stride_info)
Returns expected width and height of the deconvolution's output tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &deconv_info)
Static function to check if given info will lead to a valid configuration of CLDeconvolutionLayer.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
bool is_used() const
Flags if the tensor is used or not.
CLGEMMDeconvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
1 channel, 1 F32 per channel
ITensorInfo & set_data_type(DataType data_type) override
Set the data type to the specified value.
Strides PermutationVector
Permutation vector.
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
const DataLayout data_layout
void run() override
Run the kernels contained in the function.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
unsigned int pad_top() const
Get the top padding.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
void run() override
Run the kernels contained in the function.
1 channel, 1 F16 per channel
void run() override
Run the kernels contained in the function.
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of CLGEMMLowpQuantizeDownIn...
TensorShape compute_deconvolution_output_shape(const std::pair< unsigned int, unsigned int > &out_dims, const ITensorInfo &input, const ITensorInfo &weights)
Calculate the output shape of the deconvolution layer.
void mark_as_unused() const
Marks a tensor as unused.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
void run() override final
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number unsigned
void run() override
Run the kernels contained in the function.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
Static function to check if given info will lead to a valid configuration of CLSlice.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
unsigned int pad_right() const
Get the right padding.
Padding and stride information class.
void end(TokenStream &in, bool &valid)
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Num samples, channels, height, width.
void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Interface for the OpenCL kernel to be used for reshaping the tensor before returning the result of de...
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
Memory group resources scope handling class.
Interface for OpenCL tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTranspose.
void run() override
Run the kernels contained in the function.
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void free() override
Free allocated OpenCL memory.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
Set the input and output tensors.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and outputs.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration of CLPermute.
quantized, asymmetric fixed-point 8-bit number signed
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and output.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
unsigned int pad_bottom() const
Get the bottom padding.
unsigned int pad_left() const
Get the left padding.
void prepare() override
Prepare the function for executing.
DataLayout
[DataLayout enum definition]
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
~CLGEMMDeconvolutionLayer()
Default desctructor.