25 #ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION 45 #ifdef ARM_COMPUTE_ASSERTS_ENABLED 69 const auto t_input_shape =
TensorShape(384, 12, 12);
72 const auto t_weight_shape =
TensorShape(384, 1, 1, 16);
90 fill<float>(ref_t_input, 0,
library.get());
91 fill<float>(ref_t_weight, 1,
library.get());
92 fill<float>(ref_t_l1_addend, 2,
library.get());
98 auto t_dst_shape_nchw = t_dst_shape;
104 ref_t_l1_addend_nchw,
114 const auto op_t_input =
add_tensor(op_graph, t_input_info);
115 const auto op_t_weight =
add_tensor(op_graph, t_weight_info);
116 const auto op_t_l1_addend =
add_tensor(op_graph, t_l1_addend_info);
117 const auto op_t_acc =
add_tensor(op_graph, t_acc_info);
139 t_weight.allocator()->init(t_weight_info);
144 t_input.allocator()->allocate();
145 t_weight.allocator()->allocate();
146 t_l1_addend.allocator()->allocate();
147 t_dst.allocator()->allocate();
153 { op_t_weight, &t_weight },
154 { op_t_l1_addend, &t_l1_addend },
162 bind_tensors(aux_tensor_data, prepare_pack_map, run_pack_map, workload, bp_tensors);
165 op.
run(run_pack_map);
174 const auto t_input_shape =
TensorShape(384, 12, 12);
175 const auto t_weight_shape =
TensorShape(384, 1, 1, 64);
188 const auto op_t_input =
add_tensor(op_graph, t_input_info);
189 const auto op_t_weight =
add_tensor(op_graph, t_weight_info);
190 const auto op_t_l1_addend =
add_tensor(op_graph, t_l1_addend_info);
191 const auto op_t_acc =
add_tensor(op_graph, t_acc_info);
209 const auto t_input_shape =
TensorShape(384, 12, 12);
210 const auto t_weight_shape =
TensorShape(384, 1, 1, 64);
220 const auto op_t_input =
add_tensor(op_graph, t_input_info);
221 const auto op_t_weight =
add_tensor(op_graph, t_weight_info);
236 TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL)
272 TEST_CASE(Enlarging_Execution_Space, framework::DatasetMode::ALL)
279 const auto t_l0_lhs_shape =
TensorShape(1, 256, 3);
280 const auto t_l0_rhs_shape =
TensorShape(1, 256, 3);
282 const auto t_l2_lhs_shape =
TensorShape(1024, 1, 3);
295 const auto op_t_l0_lhs =
add_tensor(op_graph, t_l0_lhs_info);
296 const auto op_t_l0_rhs =
add_tensor(op_graph, t_l0_rhs_info);
297 const auto op_t_l1_rhs =
add_tensor(op_graph, t_l1_rhs_info);
298 const auto op_t_l2_lhs =
add_tensor(op_graph, t_l2_lhs_info);
300 const auto op_t_l1_dst =
add_tensor(op_graph, t_l1_dst_info);
314 TEST_CASE(Root_Simple_And_Complex, framework::DatasetMode::ALL)
322 const auto t_l0_0_input_shape =
TensorShape(128, 21, 21);
323 const auto t_l0_0_weight_shape =
TensorShape(144, 128, 1, 1);
324 const auto t_l0_1_lhs_shape =
TensorShape(144, 21, 21);
325 const auto t_l0_1_rhs_shape =
TensorShape(1, 1, 21);
339 const auto op_t_l0_0_input =
add_tensor(op_graph, t_l0_0_input_info);
340 const auto op_t_l0_0_weight =
add_tensor(op_graph, t_l0_0_weight_info);
341 const auto op_t_l0_1_lhs =
add_tensor(op_graph, t_l0_1_lhs_info);
342 const auto op_t_l0_1_rhs =
add_tensor(op_graph, t_l0_1_rhs_info);
343 const auto op_t_l0_0_dst =
add_tensor(op_graph, t_l0_0_dst_info);
344 const auto op_t_l0_1_dst =
add_tensor(op_graph, t_l0_1_dst_info);
379 const auto op_t_l0_lhs =
add_tensor(op_graph, t_l0_lhs_info);
380 const auto op_t_l1_lhs =
add_tensor(op_graph, t_l1_lhs_info);
381 const auto op_t_state0 =
add_tensor(op_graph, state0_info);
382 const auto op_t_state1 =
add_tensor(op_graph, state1_info);
Status validate(const OperatorGraph &op_graph)
Return the validity of op_graph, usually after performing an operation (e.g.
void default_reinit(ICLTuner *cl_tuner=nullptr, CLGEMMHeuristicsHandle *gemm_h=nullptr, CLBackendType cl_backend_type=CLBackendType::Native)
Re-initializes the context and command queue used by the scheduler to default values and sets a defau...
RelativeTolerance< float > tolerance_f32(0.001f)
F32 Tolerance value for comparing reference's output against implementation's output for floating poi...
Operator add_op_elementwise_add(OperatorGraph &graph, const AddDescriptor &desc, OpTensor lhs, OpTensor rhs, OpTensor dst)
Add op Add to graph, and optionally describes fusion through passing of intermediate OpTensor s...
static CLScheduler & get()
Access the scheduler singleton.
GPUTarget target() const
Get the target GPU.
Context (device capabilities, platform details) associated with a ClWorkload.
1 channel, 1 F32 per channel
Graph of operators to execute within a Workload.
DimensionRoundingType
Dimension rounding type when down-scaling on CNNs.
Strides PermutationVector
Permutation vector.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context...
const auto t_l1_weight_shape
CLCompileContext & get_compile_context()
Gets the compile context used.
void force_conv2d_method(OperatorGraph &graph, Operator conv2d, ConvolutionMethod method)
(Only for Debuging and Testing) Force a conv2d method
SimpleTensor< T > arithmetic_operation(ArithmeticOperation op, const SimpleTensor< T > &src1, const SimpleTensor< T > &src2, SimpleTensor< T > &dst, ConvertPolicy convert_policy)
const auto t_l0_weight_shape
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
Copyright (c) 2017-2022 Arm Limited.
#define TEST_SUITE(SUITE_NAME)
#define TEST_CASE(TEST_NAME, MODE)
void permute(Dimensions< T > &dimensions, const PermutationVector &perm)
Permutes given Dimensions according to a permutation vector.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Quantization information.
DatasetMode
Possible dataset modes.
void run(TensorPackMap &tensor_pack_map)
Enqueue run workloads.
std::unique_ptr< AssetsLibrary > library
quantized, asymmetric fixed-point 8-bit number unsigned
Accessor implementation for CLTensor objects.
const ClWorkloadContext workload_ctx
Padding and stride information class.
const auto op_t_l0_weight
Descriptor for Addition operation.
Num samples, channels, height, width.
Descriptor for Conv2dDescriptor operation.
Simple tensor object that stores elements in a consecutive chunk of memory.
Status build(ClWorkload &workload, const OperatorGraph &op_graph, const ClWorkloadContext &ctx)
Build a ClWorkload from an OperatorGraph.
GPU information for ClWorkloadContext.
SimpleTensor< T > convolution_layer(const SimpleTensor< T > &src, const SimpleTensor< TW > &weights, const SimpleTensor< TB > &bias, const TensorShape &output_shape, const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info)
void prepare(TensorPackMap &tensor_pack_map)
Enqueue prepare workloads.
OpTensor add_tensor(OperatorGraph &graph, ITensorInfo &info)
Associate a TensorInfo with a newly created OpTensor in the graph.
Class reprensenting a relative tolerance value.
std::map< OpTensor, ITensor * > OpTensorBinding
Map OpTensor handles to their corresponding ITensor memory.
Num samples, height, width, channels.
void configure(const CLCompileContext &ctx, const ClWorkload &workload)
Configures a ClCompositeOperator with a ClWorkload This includes the compilation of Cl kernels inside...
Store the tensor's metadata.
#define ARM_COMPUTE_EXPECT(X, LEVEL)
Holder of any auxiliary CLTensors required by a ClWorkload.
static Status validate(const ClWorkload &workload)
Validate ClWorkload workload.
Map a kernel (as identified by its unit workload id) to its corresponding tensor pack.
const auto t_l0_input_shape
const auto op_t_l1_weight
ConvertPolicy
Policy to handle integer overflow.
Operator add_op_conv2d(OperatorGraph &graph, const Conv2dDescriptor &desc, OpTensor input, OpTensor weights, OpTensor bias, OpTensor dst)
Add op Conv2d to graph.
Operator runtime to run a ClWorkload.
Status bind_tensors(ClAuxTensorData &aux_tensor_data, TensorPackMap &prepare_pack_map, TensorPackMap &run_pack_map, const ClWorkload &workload, const OpTensorBinding &op_tensors)
Bind tensor memory to packs used by prepare and run methods.
Basic implementation of the OpenCL tensor interface.