Compute Library
 22.11
Integration_OperatorFuseMovenetSubGraph1.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
27 
35 #include "tests/CL/CLAccessor.h"
37 #include "tests/framework/Macros.h"
40 
44 
45 #ifdef ARM_COMPUTE_ASSERTS_ENABLED
47 #endif /* ARM_COMPUTE_ASSERTS_ENABLED */
48 
51 
52 namespace arm_compute
53 {
54 namespace test
55 {
56 namespace validation
57 {
58 TEST_SUITE(CL)
59 TEST_SUITE(INTEGRATION)
60 TEST_SUITE(DYNAMIC_FUSION)
61 TEST_CASE(Operator_Fuse_Movenet_SubGraph_1_F32, framework::DatasetMode::ALL)
62 {
63  // Please refer to: https://confluence.arm.com/pages/viewpage.action?pageId=886243697
64  /* Computation:
65  * out = add_desc(addend, conv2d1x1(direct_conv)(input, weights, bias))
66  */
67  const auto data_type = DataType::F32;
68  const auto data_layout = DataLayout::NHWC;
69  const auto t_input_shape = TensorShape(384, 12, 12);
70  // const auto t_weight_shape = TensorShape(384, 1, 1, 64);
71  // const auto t_dst_shape = TensorShape(64, 12, 12);
72  const auto t_weight_shape = TensorShape(384, 1, 1, 16);
73  const auto t_dst_shape = TensorShape(16, 12, 12);
74  auto t_input_info = TensorInfo(t_input_shape, 1, data_type, data_layout);
75  auto t_weight_info = TensorInfo(t_weight_shape, 1, data_type, data_layout);
76  auto t_l1_addend_info = TensorInfo(t_dst_shape, 1, data_type, data_layout);
77  auto t_acc_info = TensorInfo(); // Intermediate tensor for cond3
78  auto t_dst_info = TensorInfo();
79 
82 
83  // Create reference
84  SimpleTensor<float> ref_t_input{ t_input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
85  SimpleTensor<float> ref_t_weight{ t_weight_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
86  SimpleTensor<float> ref_t_bias_placeholder{ t_dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
87  SimpleTensor<float> ref_t_l1_addend{ t_dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
88 
89  // Fill reference
90  fill<float>(ref_t_input, 0, library.get());
91  fill<float>(ref_t_weight, 1, library.get());
92  fill<float>(ref_t_l1_addend, 2, library.get());
93 
94  auto ref_t_input_nchw = reference::permute(ref_t_input, PermutationVector(1U, 2U, 0U));
95  auto ref_t_weight_nchw = reference::permute(ref_t_weight, PermutationVector(1U, 2U, 0U));
96  auto ref_t_bias_placeholder_nchw = reference::permute(ref_t_bias_placeholder, PermutationVector(1U, 2U, 0U));
97  auto ref_t_l1_addend_nchw = reference::permute(ref_t_l1_addend, PermutationVector(1U, 2U, 0U));
98  auto t_dst_shape_nchw = t_dst_shape;
99  permute(t_dst_shape_nchw, PermutationVector(1U, 2U, 0U));
100 
101  PadStrideInfo legacy_pad_stride(conv2d_desc.stride.x(), conv2d_desc.stride.y(), conv2d_desc.pad.left, conv2d_desc.pad.right, conv2d_desc.pad.top, conv2d_desc.pad.bottom, DimensionRoundingType{});
102  auto ref_t_dst_nchw = reference::arithmetic_operation(
104  ref_t_l1_addend_nchw,
105  reference::convolution_layer(ref_t_input_nchw, ref_t_weight_nchw, ref_t_bias_placeholder_nchw, t_dst_shape_nchw, legacy_pad_stride, conv2d_desc.dilation),
106  data_type,
107  ConvertPolicy{});
108  const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U));
109 
111  const auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
113 
114  const auto op_t_input = add_tensor(op_graph, t_input_info);
115  const auto op_t_weight = add_tensor(op_graph, t_weight_info);
116  const auto op_t_l1_addend = add_tensor(op_graph, t_l1_addend_info);
117  const auto op_t_acc = add_tensor(op_graph, t_acc_info); // temp accumulator; TensorInfo to be inferred
118  const auto op_t_dst = add_tensor(op_graph, t_dst_info);
119 
120  auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_acc);
122  add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst);
123 
126  build(workload, op_graph, workload_ctx);
127 
129  op.configure(cl_compile_ctx, workload);
130 
131  // Construct tensors
132  CLTensor t_input{};
133  CLTensor t_weight{};
134  CLTensor t_l1_addend{};
135  CLTensor t_dst{};
136 
137  // Init tensors
138  t_input.allocator()->init(t_input_info);
139  t_weight.allocator()->init(t_weight_info);
140  t_l1_addend.allocator()->init(t_dst_info);
141  t_dst.allocator()->init(t_dst_info);
142 
143  // Allocate and fill tensors
144  t_input.allocator()->allocate();
145  t_weight.allocator()->allocate();
146  t_l1_addend.allocator()->allocate();
147  t_dst.allocator()->allocate();
148  fill<float>(CLAccessor(t_input), 0, library.get());
149  fill<float>(CLAccessor(t_weight), 1, library.get());
150  fill<float>(CLAccessor(t_l1_addend), 2, library.get());
151  // "Pack" tensors
152  OpTensorBinding bp_tensors({ { op_t_input, &t_input },
153  { op_t_weight, &t_weight },
154  { op_t_l1_addend, &t_l1_addend },
155  { op_t_dst, &t_dst }
156  });
157 
158  // Populate prepare and run pack-maps (including allocating aux tensors)
159  ClAuxTensorData aux_tensor_data{};
160  TensorPackMap prepare_pack_map{};
161  TensorPackMap run_pack_map{};
162  bind_tensors(aux_tensor_data, prepare_pack_map, run_pack_map, workload, bp_tensors);
163 
164  op.prepare(prepare_pack_map);
165  op.run(run_pack_map);
166  RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
167  validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32);
168 }
169 TEST_SUITE(Unsupported)
170 TEST_CASE(DataType_QASYMM8, framework::DatasetMode::ALL)
171 {
172  const auto data_type = DataType::QASYMM8;
173  const auto data_layout = DataLayout::NHWC;
174  const auto t_input_shape = TensorShape(384, 12, 12);
175  const auto t_weight_shape = TensorShape(384, 1, 1, 64);
176  const auto t_dst_shape = TensorShape(64, 12, 12);
177  auto t_input_info = TensorInfo(t_input_shape, 1, data_type, data_layout);
178  auto t_weight_info = TensorInfo(t_weight_shape, 1, data_type, data_layout);
179  auto t_l1_addend_info = TensorInfo(t_dst_shape, 1, data_type, data_layout);
180  auto t_acc_info = TensorInfo(t_dst_shape, 1, data_type, data_layout);
181  auto t_dst_info = TensorInfo(t_dst_shape, 1, data_type, data_layout);
182 
184  ElementwiseDescriptor add_desc{};
185 
187 
188  const auto op_t_input = add_tensor(op_graph, t_input_info);
189  const auto op_t_weight = add_tensor(op_graph, t_weight_info);
190  const auto op_t_l1_addend = add_tensor(op_graph, t_l1_addend_info);
191  const auto op_t_acc = add_tensor(op_graph, t_acc_info); // temp accumulator; TensorInfo to be inferred
192  const auto op_t_dst = add_tensor(op_graph, t_dst_info);
193 
194  auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_acc);
195  add_op_elementwise_op(op_graph, add_desc, op_t_acc, op_t_l1_addend, op_t_dst);
197 
200  const auto success = build(workload, op_graph, workload_ctx);
201 
202  ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS);
203  ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS);
204 }
205 TEST_CASE(DataLayout_NCHW, framework::DatasetMode::ALL)
206 {
207  const auto data_type = DataType::F32;
208  const auto data_layout = DataLayout::NCHW;
209  const auto t_input_shape = TensorShape(384, 12, 12);
210  const auto t_weight_shape = TensorShape(384, 1, 1, 64);
211  const auto t_dst_shape = TensorShape(64, 12, 12);
212  auto t_input_info = TensorInfo(t_input_shape, 1, data_type, data_layout);
213  auto t_weight_info = TensorInfo(t_weight_shape, 1, data_type, data_layout);
214  auto t_dst_info = TensorInfo(t_dst_shape, 1, data_type, data_layout);
215 
217 
219 
220  const auto op_t_input = add_tensor(op_graph, t_input_info);
221  const auto op_t_weight = add_tensor(op_graph, t_weight_info);
222  const auto op_t_dst = add_tensor(op_graph, t_dst_info);
223 
224  auto conv2d = add_op_conv2d(op_graph, conv2d_desc, op_t_input, op_t_weight, op_t_dst);
228  const auto success = build(workload, op_graph, workload_ctx);
229 
230  ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS);
231  ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS);
232 }
233 TEST_SUITE_END() // Unsupported
234 
235 TEST_SUITE(Invalid)
236 TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL)
237 {
238  /* Computation:
239  * out = conv2d(conv2d(l0_input, l0_weight), l1_weight)
240  */
241  const auto data_type = DataType::F32;
242  const auto data_layout = DataLayout::NHWC;
243  const auto t_l0_input_shape = TensorShape(1024, 56, 56);
244  const auto t_l0_weight_shape = TensorShape(512, 1024, 1, 1);
245  const auto t_l1_weight_shape = TensorShape(512, 256, 1, 1);
246 
252 
255 
256  const auto op_t_l0_input = add_tensor(op_graph, t_l0_input_info);
257  const auto op_t_l0_weight = add_tensor(op_graph, t_l0_weight_info);
258  const auto op_t_l1_weight = add_tensor(op_graph, t_l1_weight_info);
259  const auto op_t_l0_dst = add_tensor(op_graph, t_l0_dst_info); // temp accumulator; TensorInfo to be inferred
260  const auto op_t_dst = add_tensor(op_graph, t_dst_info);
261 
264 
267  const auto success = build(workload, op_graph, workload_ctx);
268 
269  ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS);
270  ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS);
271 }
272 TEST_CASE(Enlarging_Execution_Space, framework::DatasetMode::ALL)
273 {
274  /* Computation:
275  * out = add(l2_lhs, add(add(l0_lhs, l0_rhs), l1_rhs))
276  */
277  const auto data_type = DataType::F32;
278  const auto data_layout = DataLayout::NHWC;
279  const auto t_l0_lhs_shape = TensorShape(1, 256, 3);
280  const auto t_l0_rhs_shape = TensorShape(1, 256, 3);
281  const auto t_l1_rhs_shape = TensorShape(1, 1, 3);
282  const auto t_l2_lhs_shape = TensorShape(1024, 1, 3);
283 
284  auto t_l0_lhs_info = TensorInfo(t_l0_lhs_shape, 1, data_type, data_layout);
285  auto t_l0_rhs_info = TensorInfo(t_l0_rhs_shape, 1, data_type, data_layout);
286  auto t_l1_rhs_info = TensorInfo(t_l1_rhs_shape, 1, data_type, data_layout);
287  auto t_l2_lhs_info = TensorInfo(t_l2_lhs_shape, 1, data_type, data_layout);
288  auto t_l0_dst_info = TensorInfo();
289  auto t_l1_dst_info = TensorInfo();
290  auto t_dst_info = TensorInfo();
291 
293  const auto add_desc = ElementwiseDescriptor{};
294 
295  const auto op_t_l0_lhs = add_tensor(op_graph, t_l0_lhs_info);
296  const auto op_t_l0_rhs = add_tensor(op_graph, t_l0_rhs_info);
297  const auto op_t_l1_rhs = add_tensor(op_graph, t_l1_rhs_info);
298  const auto op_t_l2_lhs = add_tensor(op_graph, t_l2_lhs_info);
299  const auto op_t_l0_dst = add_tensor(op_graph, t_l0_dst_info); // temp accumulator; TensorInfo to be inferred
300  const auto op_t_l1_dst = add_tensor(op_graph, t_l1_dst_info); // temp accumulator; TensorInfo to be inferred
301  const auto op_t_dst = add_tensor(op_graph, t_dst_info);
302 
303  add_op_elementwise_op(op_graph, add_desc, op_t_l0_lhs, op_t_l0_rhs, op_t_l0_dst);
304  add_op_elementwise_op(op_graph, add_desc, op_t_l0_dst, op_t_l1_rhs, op_t_l1_dst);
305  add_op_elementwise_op(op_graph, add_desc, op_t_l1_dst, op_t_l2_lhs, op_t_dst);
306 
309  const auto success = build(workload, op_graph, workload_ctx);
310 
311  ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS);
312  ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS);
313 }
314 TEST_CASE(Root_Simple_And_Complex, framework::DatasetMode::ALL)
315 {
316  /* Computation:
317  * out = add(conv(l0_0_input, l0_0_weight), add(l0_1_lhs, l0_1_rhs))
318  */
319  const auto data_type = DataType::F32;
320  const auto data_layout = DataLayout::NHWC;
321 
322  const auto t_l0_0_input_shape = TensorShape(128, 21, 21);
323  const auto t_l0_0_weight_shape = TensorShape(144, 128, 1, 1);
324  const auto t_l0_1_lhs_shape = TensorShape(144, 21, 21);
325  const auto t_l0_1_rhs_shape = TensorShape(1, 1, 21);
326 
327  auto t_l0_0_input_info = TensorInfo(t_l0_0_input_shape, 1, data_type, data_layout);
328  auto t_l0_0_weight_info = TensorInfo(t_l0_0_weight_shape, 1, data_type, data_layout);
329  auto t_l0_1_lhs_info = TensorInfo(t_l0_1_lhs_shape, 1, data_type, data_layout);
330  auto t_l0_1_rhs_info = TensorInfo(t_l0_1_rhs_shape, 1, data_type, data_layout);
331  auto t_l0_0_dst_info = TensorInfo();
332  auto t_l0_1_dst_info = TensorInfo();
333  auto t_dst_info = TensorInfo();
334 
336  const auto conv2d_desc = Conv2dDescriptor{};
337  const auto add_desc = ElementwiseDescriptor{};
338 
339  const auto op_t_l0_0_input = add_tensor(op_graph, t_l0_0_input_info);
340  const auto op_t_l0_0_weight = add_tensor(op_graph, t_l0_0_weight_info);
341  const auto op_t_l0_1_lhs = add_tensor(op_graph, t_l0_1_lhs_info);
342  const auto op_t_l0_1_rhs = add_tensor(op_graph, t_l0_1_rhs_info);
343  const auto op_t_l0_0_dst = add_tensor(op_graph, t_l0_0_dst_info); // temp accumulator; TensorInfo to be inferred
344  const auto op_t_l0_1_dst = add_tensor(op_graph, t_l0_1_dst_info); // temp accumulator; TensorInfo to be inferred
345  const auto op_t_dst = add_tensor(op_graph, t_dst_info);
346 
347  add_op_conv2d(op_graph, conv2d_desc, op_t_l0_0_input, op_t_l0_0_weight, op_t_l0_0_dst);
348  add_op_elementwise_op(op_graph, add_desc, op_t_l0_1_lhs, op_t_l0_1_rhs, op_t_l0_1_dst);
349  add_op_elementwise_op(op_graph, add_desc, op_t_l0_0_dst, op_t_l0_1_dst, op_t_dst);
350 
353  const auto success = build(workload, op_graph, workload_ctx);
354 
355  ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS);
356  ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS);
357 }
358 TEST_CASE(Loop, framework::DatasetMode::ALL)
359 {
360  /* Computation:
361  * tensor state0;
362  * state1 = add(l0_lhs, state0)
363  * state0 = add(l1_lhs, state1)
364  */
365  const auto data_type = DataType::F32;
366  const auto data_layout = DataLayout::NHWC;
367 
368  const auto t_shape = TensorShape(13, 21);
369 
370  auto t_l0_lhs_info = TensorInfo(t_shape, 1, data_type, data_layout);
371  auto t_l1_lhs_info = TensorInfo(t_shape, 1, data_type, data_layout);
372  auto state0_info = TensorInfo(t_shape, 1, data_type, data_layout);
373  auto state1_info = TensorInfo();
374 
376  const auto conv2d_desc = Conv2dDescriptor{};
377  const auto add_desc = ElementwiseDescriptor{};
378 
379  const auto op_t_l0_lhs = add_tensor(op_graph, t_l0_lhs_info);
380  const auto op_t_l1_lhs = add_tensor(op_graph, t_l1_lhs_info);
381  const auto op_t_state0 = add_tensor(op_graph, state0_info);
382  const auto op_t_state1 = add_tensor(op_graph, state1_info);
383 
384  add_op_conv2d(op_graph, conv2d_desc, op_t_l0_lhs, op_t_state0, op_t_state1);
385  add_op_elementwise_op(op_graph, add_desc, op_t_l1_lhs, op_t_state1, op_t_state0);
386 
389  const auto success = build(workload, op_graph, workload_ctx);
390 
391  ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS);
392  ARM_COMPUTE_EXPECT(!bool(ClCompositeOperator::validate(workload)), framework::LogLevel::ERRORS);
393 }
394 TEST_SUITE_END() // Invalid
395 
396 TEST_SUITE_END() // DYNAMIC_FUSION
397 TEST_SUITE_END() // INTEGRATION
398 TEST_SUITE_END() // CL
399 } // namespace validation
400 } // namespace test
401 } // namespace arm_compute
402 #endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
Status validate(const OperatorGraph &op_graph)
Return the validity of op_graph, usually after performing an operation (e.g.
Shape of a tensor.
Definition: TensorShape.h:39
void default_reinit(ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr, CLBackendType cl_backend_type=CLBackendType::Native)
Re-initializes the context and command queue used by the scheduler to default values and sets a defau...
RelativeTolerance< float > tolerance_f32(0.001f)
F32 Tolerance value for comparing reference&#39;s output against implementation&#39;s output for floating poi...
static CLScheduler & get()
Access the scheduler singleton.
GPUTarget target() const
Get the target GPU.
Definition: CLScheduler.cpp:49
Context (device capabilities, platform details) associated with a ClWorkload.
Definition: ClWorkload.h:162
1 channel, 1 F32 per channel
Operator add_op_elementwise_op(OperatorGraph &graph, const ElementwiseDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst)
Add op Elementwise to graph, and optionally describes fusion through passing of intermediate OpTensor...
Graph of operators to execute within a Workload.
Definition: OperatorGraph.h:42
DimensionRoundingType
Dimension rounding type when down-scaling on CNNs.
Definition: Types.h:550
Strides PermutationVector
Permutation vector.
Definition: Types.h:51
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
CLTensorAllocator * allocator()
Return a pointer to the tensor&#39;s allocator.
Definition: CLTensor.cpp:61
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
CLCompileContext & get_compile_context()
Gets the compile context used.
void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMethod method)
(Only for Debuging and Testing) Force a conv2d method
SimpleTensor< T > arithmetic_operation(ArithmeticOperation op, const SimpleTensor< T > &src1, const SimpleTensor< T > &src2, SimpleTensor< T > &dst, ConvertPolicy convert_policy)
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2022 Arm Limited.
#define TEST_SUITE(SUITE_NAME)
Definition: Macros.h:34
#define TEST_CASE(TEST_NAME, MODE)
Definition: Macros.h:180
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
Definition: Helpers.h:125
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Quantization information.
DatasetMode
Possible dataset modes.
Definition: DatasetModes.h:40
void run(TensorPackMap &tensor_pack_map)
Enqueue run workloads.
std::unique_ptr< AssetsLibrary > library
Definition: main.cpp:76
quantized, asymmetric fixed-point 8-bit number unsigned
Accessor implementation for CLTensor objects.
Definition: CLAccessor.h:36
Padding and stride information class.
Definition: Types.h:669
Descriptor for Elementwise binary operation.
Num samples, channels, height, width.
Descriptor for Conv2dDescriptor operation.
Simple tensor object that stores elements in a consecutive chunk of memory.
Definition: SimpleTensor.h:58
Status build(ClWorkload &workload, const OperatorGraph &op_graph, const ClWorkloadContext &ctx)
Build a ClWorkload from an OperatorGraph.
Definition: ClWorkload.cpp:36
GPU information for ClWorkloadContext.
Definition: ClWorkload.h:149
SimpleTensor< T > convolution_layer(const SimpleTensor< T > &src, const SimpleTensor< TW > &weights, const SimpleTensor< TB > &bias, const TensorShape &output_shape, const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info)
void prepare(TensorPackMap &tensor_pack_map)
Enqueue prepare workloads.
OpTensor add_tensor(OperatorGraph &graph, ITensorInfo &info)
Associate a TensorInfo with a newly created OpTensor in the graph.
Class reprensenting a relative tolerance value.
Definition: Validation.h:97
std::map< OpTensor, ITensor * > OpTensorBinding
Map OpTensor handles to their corresponding ITensor memory.
Num samples, height, width, channels.
void configure(const CLCompileContext &ctx, const ClWorkload &workload)
Configures a ClCompositeOperator with a ClWorkload This includes the compilation of Cl kernels inside...
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:43
#define ARM_COMPUTE_EXPECT(X, LEVEL)
Definition: Asserts.h:131
Holder of any auxiliary CLTensors required by a ClWorkload.
static Status validate(const ClWorkload &workload)
Validate ClWorkload workload.
Map a kernel (as identified by its unit workload id) to its corresponding tensor pack.
ConvertPolicy
Policy to handle integer overflow.
Definition: Types.h:404
Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor bias, OpTensor dst)
Add op Conv2d to graph.
#define TEST_SUITE_END()
Definition: Macros.h:39
Status bind_tensors(ClAuxTensorData &aux_tensor_data, TensorPackMap &prepare_pack_map, TensorPackMap &run_pack_map, const ClWorkload &workload, const OpTensorBinding &op_tensors)
Bind tensor memory to packs used by prepare and run methods.
Basic implementation of the OpenCL tensor interface.
Definition: CLTensor.h:41