53 cl_mem import_malloc_memory_helper(
void *ptr,
size_t size)
55 const cl_import_properties_arm import_properties[] =
58 CL_IMPORT_TYPE_HOST_ARM,
62 cl_int err = CL_SUCCESS;
69 class DummyAllocator final :
public IAllocator
72 DummyAllocator() =
default;
74 void *allocate(
size_t size,
size_t alignment)
override 77 return _backend_allocator.allocate(size, alignment);
79 void free(
void *ptr)
override 81 return _backend_allocator.free(ptr);
83 std::unique_ptr<IMemoryRegion> make_region(
size_t size,
size_t alignment)
override 87 return _backend_allocator.make_region(size, alignment);
89 int get_n_calls()
const 96 CLBufferAllocator _backend_allocator{};
99 void run_conv2d(std::shared_ptr<IMemoryManager> mm, IAllocator &mm_allocator)
102 CLTensor
src, weights, bias,
dst;
109 CLGEMMConvolutionLayer conv(mm);
110 conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1U, 1U, 1U, 1U));
113 src.allocator()->allocate();
114 weights.allocator()->allocate();
115 bias.allocator()->allocate();
116 dst.allocator()->allocate();
121 mm->populate(mm_allocator, 1 );
137 DummyAllocator global_tensor_alloc;
141 run_conv2d(
nullptr , global_tensor_alloc);
153 auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
154 auto pool_mgr = std::make_shared<PoolManager>();
155 auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
157 DummyAllocator global_tensor_alloc;
161 run_conv2d(mm, global_tensor_alloc);
178 auto buf = cl::Buffer(
CLScheduler::get().context(), CL_MEM_READ_WRITE, total_size);
189 t2.allocator()->set_associated_memory_group(&mg);
196 t3.allocator()->init(info_neg);
206 t4.allocator()->free();
232 act_func.
configure(&tensor,
nullptr, act_info);
238 size_t space = total_size_in_bytes + alignment;
239 auto raw_data = std::make_unique<uint8_t[]>(space);
241 void *aligned_ptr = raw_data.get();
242 std::align(alignment, total_size_in_bytes, aligned_ptr, space);
244 cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes));
249 std::uniform_real_distribution<float>
distribution(-5.f, 5.f);
250 std::mt19937 gen(
library->seed());
251 auto *typed_ptr =
reinterpret_cast<float *
>(aligned_ptr);
252 for(
unsigned int i = 0; i < total_size_in_elems; ++i)
262 for(
unsigned int i = 0; i < total_size_in_elems; ++i)
273 #if !defined(BARE_METAL) 295 act_func.
configure(&tensor,
nullptr, act_info);
302 std::ofstream output_file(
"test_mmap_import.bin", std::ios::binary | std::ios::out);
303 output_file.seekp(total_size_in_bytes - 1);
304 output_file.write(
"", 1);
310 unsigned char *data = mmapped_file.
data();
312 cl::Buffer wrapped_buffer(import_malloc_memory_helper(data, total_size_in_bytes));
317 std::uniform_real_distribution<float>
distribution(-5.f, 5.f);
318 std::mt19937 gen(
library->seed());
319 auto *typed_ptr =
reinterpret_cast<float *
>(data);
320 for(
unsigned int i = 0; i < total_size_in_elems; ++i)
330 for(
unsigned int i = 0; i < total_size_in_elems; ++i)
340 #endif // !defined(BARE_METAL) 347 const std::vector<float>
scale = { 0.25f, 1.4f, 3.2f, 2.3f, 4.7f };
371 auto cl_scale_buffer = quantization.scale->cl_buffer();
372 void *mapped_ptr =
CLScheduler::get().
queue().enqueueMapBuffer(cl_scale_buffer, CL_TRUE, CL_MAP_READ, 0, scale.size());
373 auto cl_scale_ptr =
static_cast<float *
>(mapped_ptr);
374 for(
unsigned int i = 0; i < scale.size(); ++i)
const std::vector< int32_t > & offset() const
Offset vector accessor.
#define ARM_COMPUTE_ASSERT(cond)
TensorInfo * info() const override
Interface to be implemented by the child class to return the tensor's metadata.
bool is_resizable() const override
Flag indicating whether the size of the tensor can be changed.
static void set_global_allocator(IAllocator *allocator)
Sets global allocator that will be used by all CLTensor objects.
static CLScheduler & get()
Access the scheduler singleton.
QuantizationInfo quantization_info() const override
Get the quantization settings (scale and offset) of the tensor.
1 channel, 1 F32 per channel
ARM_COMPUTE_EXPECT(has_error==expected, framework::LogLevel::ERRORS)
Memory mapped file class.
static CLKernelLibrary & get()
Access the KernelLibrary singleton.
void run() override
Run the kernels contained in the function.
CLTensorAllocator * allocator()
Return a pointer to the tensor's allocator.
Basic function to run opencl::kernels::ClActivationKernel.
Activation Layer Information class.
OpenCL quantization data.
void init(const TensorInfo &input, size_t alignment=0)
Initialize a tensor based on the passed TensorInfo.
SimpleTensor< float > src
Copyright (c) 2017-2021 Arm Limited.
Status import_memory(cl::Buffer buffer)
Import an existing memory as a tensor's backing memory.
Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
Quantization information.
DatasetMode
Possible dataset modes.
std::unique_ptr< AssetsLibrary > library
TEST_SUITE_END() FIXTURE_DATA_TEST_CASE(RunSmall
[CLActivationLayer Test snippet]
CLQuantization quantization() const override
Interface to be implemented by the child class to return the wrapped quantization info data...
size_t total_size() const override
Returns the total size of the tensor in bytes.
size_t total_size() const
Collapses all dimensions to a single linear total size.
bool is_mapped() const
Checks if file mapped.
const std::vector< float > & scale() const
Scale vector accessor.
TEST_SUITE(U8_to_S8) FIXTURE_DATA_TEST_CASE(RunSmall
std::uniform_real_distribution< float > distribution(-5.f, 5.f)
cl::CommandQueue & queue()
Accessor for the associated CL command queue.
unsigned char * data()
Mapped data accessor.
void sync()
Blocks until all commands in the associated command queue have finished.
quantized, symmetric per channel fixed-point 8-bit number
void allocate() override
Allocate size specified by TensorInfo of OpenCL memory.
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
bool device_supports_extension(const cl::Device &device, const char *extension_name)
Helper function to check whether a given extension is supported.
bool empty() const
Indicates whether this QuantizationInfo has valid settings or not.
Num samples, height, width, channels.
void free() override
Free allocated OpenCL memory.
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
Set the input and output tensor.
Store the tensor's metadata.
cl_mem clImportMemoryARM(cl_context context, cl_mem_flags flags, const cl_import_properties_arm *properties, void *memory, size_t size, cl_int *errcode_ret)
TEST_CASE(FusedActivation, framework::DatasetMode::ALL)
Validate fused activation expecting the following behaviours:
const TensorShape & tensor_shape() const override
Size for each dimension of the tensor.
DataType
Available data types.
const cl::Device & get_device()
Gets the CL device for which the programs are created.
Basic implementation of the OpenCL tensor interface.