14 #if defined(ARMCOMPUTENEON_ENABLED) 18 #include <doctest/doctest.h> 22 #include <arm_compute/core/CL/CLKernelLibrary.h> 23 #include <CL/cl_ext.h> 24 #include <arm_compute/runtime/CL/CLScheduler.h> 32 SampleClBackendCustomAllocator() =
default;
34 void*
allocate(
size_t size,
size_t alignment)
39 alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
41 size_t space = size + alignment + alignment;
42 auto allocatedMemPtr = std::malloc(space *
sizeof(
size_t));
44 if (std::align(alignment, size, allocatedMemPtr, space) ==
nullptr)
48 return allocatedMemPtr;
65 using namespace armnn;
69 float weightsData[] = {1.0f};
75 IConnectableLayer* fullyConnected = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
83 fullyConnected->GetOutputSlot(0).Connect(OutputLayer->
GetInputSlot(0));
87 InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
90 fullyConnected->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
101 TEST_CASE(
"ClCustomAllocatorTest")
103 using namespace armnn;
116 auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
127 std::string ignoredErrorMessage;
128 INetworkProperties networkProperties(
false, MemorySource::Malloc, MemorySource::Malloc);
129 run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
133 size_t totalBytes = numElements *
sizeof(float);
135 const size_t alignment =
136 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
138 void* alignedInputPtr = options.
m_CustomAllocatorMap[
"GpuAcc"]->allocate(totalBytes, alignment);
141 auto* inputPtr =
reinterpret_cast<float*
>(alignedInputPtr);
142 std::fill_n(inputPtr, numElements, number);
144 void* alignedOutputPtr = options.
m_CustomAllocatorMap[
"GpuAcc"]->allocate(totalBytes, alignment);
145 auto* outputPtr =
reinterpret_cast<float*
>(alignedOutputPtr);
146 std::fill_n(outputPtr, numElements, -10.0f);
150 {0,
armnn::ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), alignedInputPtr)},
154 {0,
armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
158 run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
159 run->UnloadNetwork(networkIdentifier);
163 arm_compute::CLScheduler::get().sync();
164 auto* outputResult =
reinterpret_cast<float*
>(alignedOutputPtr);
166 run->UnloadNetwork(networkIdentifier);
167 CHECK(outputResult[0] == number);
169 backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
173 #if defined(ARMCOMPUTENEON_ENABLED) 175 TEST_CASE(
"ClCustomAllocatorCpuAccNegativeTest")
177 using namespace armnn;
181 auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
191 std::vector<std::string> errMessages;
193 CHECK_THROWS_AS_MESSAGE(
Optimize(*myNetwork, {
"CpuAcc"}, run->GetDeviceSpec(), optOptions, errMessages),
195 "Expected an exception as GetAvailablePreferredBackends() should be empty in Optimize().");
198 backendRegistry.DeregisterAllocator(NeonBackend::GetIdStatic());
203 TEST_CASE(
"ClCustomAllocatorGpuAccNullptrTest")
205 using namespace armnn;
209 auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
212 CHECK_THROWS_AS_MESSAGE(
IRuntimePtr run = IRuntime::Create(options),
214 "Expected exception in RuntimeImpl::RuntimeImpl() as allocator was nullptr.");
Custom Allocator interface.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
virtual void free(void *ptr)=0
Interface to be implemented by the child class to free the allocated bytes.
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
int Connect(InputSlot &destination)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Copyright (c) 2021 ARM Limited and Contributors.
TEST_SUITE("ClCustomAllocatorTests")
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
A layer user-provided data can be bound to (e.g. inputs, outputs).
#define ARMNN_NO_DEPRECATE_WARN_END
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
A FullyConnectedDescriptor for the FullyConnectedLayer.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
virtual armnn::MemorySource GetMemorySourceType()=0
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
std::map< BackendId, std::shared_ptr< ICustomAllocator > > m_CustomAllocatorMap
A map to define a custom memory allocator for specific backend Ids.
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Base class for all ArmNN exceptions so that users can filter to just those.
MemorySource
Define the Memory Source to reduce copies.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
armnn::INetworkPtr CreateTestNetwork(armnn::TensorInfo &inputTensorInfo)
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
virtual void * allocate(size_t size, size_t alignment)=0
Interface to be implemented by the child class to allocate bytes.
unsigned int GetNumElements() const