42 const QuantizationInfo
qasymm(1.f / 128.f, 128);
43 const QuantizationInfo
qsymm_3(8.f / 32768.f, 0);
44 const QuantizationInfo
qsymm_4(16.f / 32768.f, 0);
45 const QuantizationInfo qsymm_0(1.f / 32768.f, 0);
49 : _memory_group(std::move(memory_manager)),
53 _concat_input_weights(),
54 _concat_recurrent_weights(),
58 _sigmoid_forget_gate(),
59 _sigmoid_input_gate(),
60 _sigmoid_output_gate(),
61 _tanh_modulation_gate(),
63 _add_cell_state_tmps(),
65 _mul_forget_gate_cell_state(),
66 _mul_input_gate_input_mod_gate(),
67 _mul_output_state_tmp_output_gate(),
68 _slice_input_tensor(),
69 _slice_forget_tensor(),
71 _slice_output_tensor(),
74 _input_to_input_weights(nullptr),
75 _input_to_forget_weights(nullptr),
76 _input_to_cell_weights(nullptr),
77 _input_to_output_weights(nullptr),
78 _recurrent_to_input_weights(nullptr),
79 _recurrent_to_forget_weights(nullptr),
80 _recurrent_to_cell_weights(nullptr),
81 _recurrent_to_output_weights(nullptr),
82 _input_gate_bias(nullptr),
83 _forget_gate_bias(nullptr),
85 _output_gate_bias(nullptr),
90 _weights_transposed(),
97 _input_modulation_gate_input(),
98 _forget_gate_output(),
100 _output_gate_output(),
101 _input_modulation_gate_output(),
105 _output_state_out_symm(),
106 _output_state_out_f32(),
132 output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
158 cell_state_out, output_state_out);
163 cell_bias,
output_gate_bias, cell_state_in, output_state_in, cell_state_out,
171 output_state_in->
info(), cell_state_out->
info(), output_state_out->
info()));
174 const int batch_size =
input->info()->dimension(1);
194 _cell_bias = cell_bias;
198 std::vector<const ICLTensor *> inputs_weights_vector;
204 std::vector<const ICLTensor *> recurrent_weights_vector;
212 _concat_input_weights.
configure(compile_context, inputs_weights_vector, &_input_weights,
Window::DimY);
216 _concat_recurrent_weights.
configure(compile_context, recurrent_weights_vector, &_recurrent_weights,
Window::DimY);
218 std::vector<const ICLTensor *> weights_vector;
219 weights_vector.emplace_back(&_recurrent_weights);
220 weights_vector.emplace_back(&_input_weights);
225 _transpose_weights.
configure(compile_context, &_weights, &_weights_transposed);
228 std::vector<const ICLTensor *> input_vector;
229 input_vector.emplace_back(
input);
230 input_vector.emplace_back(output_state_in);
232 _memory_group.
manage(&_input);
238 std::vector<const ICLTensor *> bias_vector;
241 bias_vector.emplace_back(cell_bias);
253 _memory_group.
manage(&_output_highp);
255 _gemmlowp.
configure(compile_context, &_input, &_weights_transposed,
nullptr, &_output_highp);
267 int output_multiplier = 0;
268 int output_shift = 0;
271 _memory_group.
manage(&_output_lowp);
275 info.gemmlowp_multiplier = output_multiplier;
276 info.gemmlowp_shift = output_shift;
278 _output_stage.
configure(compile_context, &_output_highp, &_bias, &_output_lowp,
info);
285 _memory_group.
manage(&_input_gate_input);
286 _slice_input_tensor.
configure(compile_context, &_output_lowp, &_input_gate_input, {0, 0},
288 _memory_group.
manage(&_forget_gate_input);
289 _slice_forget_tensor.
configure(compile_context, &_output_lowp, &_forget_gate_input, {
output_size, 0},
291 _memory_group.
manage(&_input_modulation_gate_input);
292 _slice_cell_tensor.
configure(compile_context, &_output_lowp, &_input_modulation_gate_input,
294 _memory_group.
manage(&_output_gate_input);
295 _slice_output_tensor.
configure(compile_context, &_output_lowp, &_output_gate_input, {3 *
output_size, 0},
301 _memory_group.
manage(&_input_gate_input);
302 _slice_input_tensor.
configure(compile_context, &_output_lowp, &_input_gate_input, {0}, {
output_size});
303 _memory_group.
manage(&_forget_gate_input);
304 _slice_forget_tensor.
configure(compile_context, &_output_lowp, &_forget_gate_input, {
output_size},
306 _memory_group.
manage(&_input_modulation_gate_input);
307 _slice_cell_tensor.
configure(compile_context, &_output_lowp, &_input_modulation_gate_input, {2 *
output_size},
309 _memory_group.
manage(&_output_gate_input);
310 _slice_output_tensor.
configure(compile_context, &_output_lowp, &_output_gate_input, {3 *
output_size},
316 _memory_group.
manage(&_forget_gate_output);
319 _sigmoid_forget_gate.
configure(compile_context, &_forget_gate_input, &_forget_gate_output,
324 _memory_group.
manage(&_input_gate_output);
327 _sigmoid_input_gate.
configure(compile_context, &_input_gate_input, &_input_gate_output,
332 _memory_group.
manage(&_input_modulation_gate_output);
335 _tanh_modulation_gate.
configure(compile_context, &_input_modulation_gate_input, &_input_modulation_gate_output,
340 _memory_group.
manage(&_output_gate_output);
343 _sigmoid_output_gate.
configure(compile_context, &_output_gate_input, &_output_gate_output,
348 _memory_group.
manage(&_cell_state_tmp1);
351 _mul_forget_gate_cell_state.
configure(compile_context, &_forget_gate_output, cell_state_in, &_cell_state_tmp1, 1,
355 _memory_group.
manage(&_cell_state_tmp2);
358 _mul_input_gate_input_mod_gate.
configure(compile_context, &_input_gate_output, &_input_modulation_gate_output,
363 _add_cell_state_tmps.
configure(compile_context, &_cell_state_tmp1, &_cell_state_tmp2, cell_state_out,
369 _memory_group.
manage(&_output_state_tmp);
372 _tanh_output_state.
configure(compile_context, cell_state_out, &_output_state_tmp,
375 _memory_group.
manage(&_output_state_out_symm);
378 _mul_output_state_tmp_output_gate.
configure(compile_context, &_output_state_tmp, &_output_gate_output,
385 _memory_group.
manage(&_output_state_out_f32);
388 _dequantize.
configure(compile_context, &_output_state_out_symm, &_output_state_out_f32);
391 _quantize.
configure(compile_context, &_output_state_out_f32, output_state_out);
421 const int batch_size =
input->dimension(1);
441 .set_quantization_info(
qasymm));
445 .set_quantization_info(
qsymm_4));
480 std::vector<const ITensorInfo *> inputs_weights_vector;
490 std::vector<const ITensorInfo *> recurrent_weights_vector;
500 std::vector<const ITensorInfo *> weights_vector;
501 weights_vector.emplace_back(&recurrent_weights);
502 weights_vector.emplace_back(&input_weights);
507 TensorInfo weights_transposed = weights.
clone()->set_is_resizable(
true).set_tensor_shape(weights_transposed_shape);
511 std::vector<const ITensorInfo *> input_vector;
512 input_vector.emplace_back(
input);
513 input_vector.emplace_back(output_state_in);
518 std::vector<const ITensorInfo *> bias_vector;
521 bias_vector.emplace_back(cell_bias);
543 int output_multiplier = 0;
544 int output_shift = 0;
551 info.gemmlowp_multiplier = output_multiplier;
552 info.gemmlowp_shift = output_shift;
607 &input_gate_input, &input_gate_output,
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
679 _concat_inputs.
run();
686 _slice_input_tensor.
run();
687 _slice_forget_tensor.
run();
688 _slice_cell_tensor.
run();
689 _slice_output_tensor.
run();
693 _sigmoid_forget_gate.
run();
696 _sigmoid_input_gate.
run();
699 _tanh_modulation_gate.
run();
702 _sigmoid_output_gate.
run();
705 _mul_forget_gate_cell_state.
run();
706 _mul_input_gate_input_mod_gate.
run();
707 _add_cell_state_tmps.
run();
710 _tanh_output_state.
run();
711 _mul_output_state_tmp_output_gate.
run();
723 _concat_input_weights.
run();
731 _concat_recurrent_weights.
run();
738 _concat_weights.
run();
746 _transpose_weights.
run();