40 std::pair<Coordinates, Coordinates> compute_start_end_slice_coordinates(
const ITensorInfo &
output_info,
const PadStrideInfo &deconv_info,
bool is_nchw)
47 start.set(0, deconv_info.pad_left());
48 start.set(1, deconv_info.pad_top());
49 end.set(0, output_info.dimension(0) - deconv_info.pad_right());
50 end.set(1, output_info.dimension(1) - deconv_info.pad_bottom());
55 start.set(1, deconv_info.pad_left());
56 start.set(2, deconv_info.pad_top());
58 end.set(0, output_info.dimension(0));
59 end.set(1, output_info.dimension(1) - deconv_info.pad_right());
60 end.set(2, output_info.dimension(2) - deconv_info.pad_bottom());
63 return { start, end };
65 Status construct_gemmlowp_output_stage(
const ITensorInfo *
input,
const ITensorInfo *weights,
const ITensorInfo *output, GEMMLowpOutputStageInfo &output_stage_info)
67 const auto data_type = input->data_type();
71 const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
72 const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
73 const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
75 float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
76 int output_multiplier(0);
81 output_stage_info.gemmlowp_multiplier = output_multiplier;
82 output_stage_info.gemmlowp_shift = output_shift;
83 output_stage_info.gemmlowp_offset = oq_info.offset;
85 output_stage_info.gemmlowp_min_bound = (std::get<0>(min_max_bound)).get<int32_t>();
86 output_stage_info.gemmlowp_max_bound = (std::get<1>(min_max_bound)).get<int32_t>();
87 output_stage_info.output_data_type =
data_type;
95 : _memory_group(
std::move(memory_manager)),
98 _gemmlowp_output_stage(),
99 _permute_input_to_nhwc(),
100 _permute_weights_to_nhwc(),
102 _transpose_weights(),
107 _reshaped_weights_t(),
114 _padded_input(false),
149 TensorInfo nhwc_input_info = input->
clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(nhwc_input_shape).set_data_layout(
DataLayout::NCHW);
151 TensorInfo nhwc_weights_info = weights->
clone()->set_is_resizable(
true).reset_padding().set_tensor_shape(nhwc_weights_shape).set_data_layout(
DataLayout::NCHW);
157 const TensorShape reshaped_shape =
TensorShape(nhwc_weights_shape[0], nhwc_weights_shape[1] * nhwc_weights_shape[2] * nhwc_weights_shape[3]);
161 TensorShape transposed_shape(reshaped_shape[1], reshaped_shape[0]);
162 const TensorInfo reshaped_t_info = reshaped_info.
clone()->set_is_resizable(
true).set_tensor_shape(transposed_shape);
170 TensorInfo gemm_output_info = reshaped_t_info.
clone()->set_tensor_shape(gemm_output_shape).set_is_resizable(
true);
189 TensorInfo col2im_output_info = gemm_output_info.
clone()->set_tensor_shape(deconv_shape).set_is_resizable(
true);
191 if(padded_input && is_quantized)
193 const auto start_end = compute_start_end_slice_coordinates(col2im_output_info, deconv_info, is_nchw);
198 else if(padded_input)
200 const auto start_end = compute_start_end_slice_coordinates(col2im_output_info, deconv_info, is_nchw);
204 else if(is_quantized)
228 bias !=
nullptr ? bias->
info() :
nullptr,
232 _original_weights = weights;
238 const ICLTensor *weights_to_use = weights;
246 _memory_group.
manage(&_permuted_input);
251 input_to_use = &_permuted_input;
252 weights_to_use = &_permuted_weights;
261 _reshape_weights.
configure(compile_context, weights_to_use, &_reshaped_weights);
262 _transpose_weights.
configure(compile_context, &_reshaped_weights, &_reshaped_weights_t);
278 _mm_gemmlowp.
configure(compile_context, input_to_use, &_reshaped_weights_t,
nullptr, &_gemm_output, gemm_info);
280 input_to_use->info()->set_quantization_info(iq_info);
285 _mm_gemm.
configure(compile_context, input_to_use, &_reshaped_weights_t,
nullptr, &_gemm_output, 1.f, 0.0f, gemm_info);
293 ICLTensor *deconv_reshape_output =
nullptr;
295 ICLTensor *output_stage_output =
nullptr;
297 if(_padded_input && _is_quantized)
299 _memory_group.
manage(&_slice_gemm_input);
300 _memory_group.
manage(&_gemmlowp_final);
301 deconv_reshape_output = &_gemmlowp_final;
302 output_stage_output = &_slice_gemm_input;
303 slice_output = output;
305 else if(_padded_input)
307 _memory_group.
manage(&_slice_gemm_input);
308 deconv_reshape_output = &_slice_gemm_input;
309 slice_output = output;
311 else if(_is_quantized)
313 _memory_group.
manage(&_gemmlowp_final);
314 deconv_reshape_output = &_gemmlowp_final;
315 output_stage_output = output;
319 deconv_reshape_output = output;
323 _deconv_reshape->configure(compile_context, &_gemm_output, bias, deconv_reshape_output, input->
info(), weights->
info(), deconv_info);
329 construct_gemmlowp_output_stage(input->
info(), weights->
info(), output->
info(), output_stage_info);
330 _gemmlowp_output_stage.
configure(compile_context, &_gemmlowp_final,
nullptr, output_stage_output, output_stage_info);
337 const auto start_end = compute_start_end_slice_coordinates(*deconv_reshape_output->
info(), deconv_info, _is_nchw);
338 _slice_gemm.
configure(compile_context, &_slice_gemm_input, slice_output, start_end.first, start_end.second);
351 _permute_input_to_nhwc.
run();
367 _gemmlowp_output_stage.
run();
385 _permute_weights_to_nhwc.
run();
389 _reshape_weights.
run();
397 _transpose_weights.
run();
410 if(!_reshaped_weights_t.
is_used())
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info)
Set the input, weights, biases and output tensors.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLReshapeLayer.
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info)
Static function to check if given info will lead to a valid configuration of CLDeconvolutionReshapeOu...
Quantize using a fixed point multiplication.
void prepare() override
Prepare the function for executing.
std::unique_ptr< ITensorInfo > clone() const override
Provide a clone of the current object of class T.
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
void prepare() override
Prepare the function for executing.
void run() override
Run the kernels contained in the function.
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.
static CLScheduler & get()
Access the scheduler singleton.
std::pair< unsigned int, unsigned int > deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, unsigned int kernel_width, unsigned int kernel_height, const PadStrideInfo &pad_stride_info)
Returns expected width and height of the deconvolution's output tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &deconv_info)
Static function to check if given info will lead to a valid configuration of CLDeconvolutionLayer.
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
virtual DataType data_type() const =0
Data type used for each element of the tensor.
bool is_used() const
Flags if the tensor is used or not.
CLGEMMDeconvolutionLayer(std::shared_ptr< IMemoryManager > memory_manager=nullptr)
Constructor.
1 channel, 1 F32 per channel
ITensorInfo & set_data_type(DataType data_type) override
Set the data type to the specified value.
Strides PermutationVector
Permutation vector.
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info)
Initialise the kernel's inputs, output.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
const DataLayout data_layout
void run() override
Run the kernels contained in the function.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
Store the tensor's metadata.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs, output.
unsigned int pad_top() const
Get the top padding.
Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon=false)
Calculate quantized representation of multiplier.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2021 Arm Limited.
void run() override
Run the kernels contained in the function.
1 channel, 1 F16 per channel
void run() override
Run the kernels contained in the function.
ITensorInfo & set_quantization_info(const QuantizationInfo &quantization_info) override
Set the quantization settings (scale and offset) of the tensor.
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info)
Static function to check if given info will lead to a valid configuration of opencl::kernels::ClGemmL...
TensorShape compute_deconvolution_output_shape(const std::pair< unsigned int, unsigned int > &out_dims, const ITensorInfo &input, const ITensorInfo &weights)
Calculate the output shape of the deconvolution layer.
void mark_as_unused() const
Marks a tensor as unused.
1 channel, 1 S32 per channel
void manage(IMemoryManageable *obj) override
Sets a object to be managed by the given memory group.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Quantization information.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMMLowpMatrixMultiply...
void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Initialise the kernel's inputs and output.
void run() override
Run the kernels contained in the function.
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.
quantized, asymmetric fixed-point 8-bit number unsigned
void run() override
Run the kernels contained in the function.
std::pair< unsigned int, unsigned int > stride() const
Get the stride.
UniformQuantizationInfo uniform() const
Return per layer quantization info.
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.
GEMMLowp output stage info.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
Static function to check if given info will lead to a valid configuration of CLSlice.
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
unsigned int pad_right() const
Get the right padding.
Padding and stride information class.
void end(TokenStream &in, bool &valid)
virtual QuantizationInfo quantization_info() const =0
Get the quantization settings (scale and offset) of the tensor.
void enqueue(ICLKernel &kernel, bool flush=true)
Schedule the execution of the passed kernel if possible.
Num samples, channels, height, width.
void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends)
Configure kernel.
bool is_data_type_quantized_asymmetric(DataType dt)
Check if a given data type is of asymmetric quantized type.
Interface for the OpenCL kernel to be used for reshaping the tensor before returning the result of de...
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
Memory group resources scope handling class.
Interface for OpenCL tensor.
static Status validate(const ITensorInfo *input, const ITensorInfo *output)
Static function to check if given info will lead to a valid configuration of CLTranspose.
void run() override
Run the kernels contained in the function.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
void free() override
Free allocated OpenCL memory.
static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info=GEMMInfo())
Static function to check if given info will lead to a valid configuration of CLGEMM.
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Store the tensor's metadata.
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
Set the input and output tensors.
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and outputs.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
Static function to check if given info will lead to a valid configuration of CLPermute.
quantized, asymmetric fixed-point 8-bit number signed
void configure(const ICLTensor *input, ICLTensor *output)
Initialise the kernel's inputs and output.
size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension)
Get the index of the given dimension.
unsigned int pad_bottom() const
Get the bottom padding.
unsigned int pad_left() const
Get the left padding.
void prepare() override
Prepare the function for executing.
DataLayout
[DataLayout enum definition]
std::tuple< PixelValue, PixelValue > get_min_max(DataType dt)
Compute the mininum and maximum values a data type can take.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
~CLGEMMDeconvolutionLayer()
Default desctructor.