ComputeLibrary/latest/tests_2validation_2_c_l_2_u_n_i_t_2_tensor_allocator_8cpp_source.xhtml

/*

 * Copyright (c) 2018-2021 Arm Limited.

 *

 * SPDX-License-Identifier: MIT

 *

 * Permission is hereby granted, free of charge, to any person obtaining a copy

 * of this software and associated documentation files (the "Software"), to

 * deal in the Software without restriction, including without limitation the

 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

 * sell copies of the Software, and to permit persons to whom the Software is

 * furnished to do so, subject to the following conditions:

 *

 * The above copyright notice and this permission notice shall be included in all

 * copies or substantial portions of the Software.

 *

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

 * SOFTWARE.

 */

#include "arm_compute/runtime/CL/CLTensorAllocator.h"


#include "arm_compute/core/utils/misc/MMappedFile.h"

#include "arm_compute/runtime/BlobLifetimeManager.h"

#include "arm_compute/runtime/CL/CLBufferAllocator.h"

#include "arm_compute/runtime/CL/CLScheduler.h"

#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"

#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"

#include "arm_compute/runtime/MemoryGroup.h"

#include "arm_compute/runtime/MemoryManagerOnDemand.h"

#include "arm_compute/runtime/PoolManager.h"

#include "tests/CL/CLAccessor.h"

#include "tests/Globals.h"

#include "tests/framework/Asserts.h"

#include "tests/framework/Macros.h"

#include "tests/validation/Validation.h"

#include "tests/validation/reference/ActivationLayer.h"


#include <memory>

#include <random>


namespace arm_compute

{

namespace test

{

namespace validation

{

namespace

{

cl_mem import_malloc_memory_helper(void *ptr, size_t size)

{

    const cl_import_properties_arm import_properties[] =

    {

        CL_IMPORT_TYPE_ARM,

        CL_IMPORT_TYPE_HOST_ARM,

        0

    };


    cl_int err = CL_SUCCESS;

    cl_mem buf = clImportMemoryARM(CLKernelLibrary::get().context().get(), CL_MEM_READ_WRITE, import_properties, ptr, size, &err);

    ARM_COMPUTE_ASSERT(err == CL_SUCCESS);


    return buf;

}


class DummyAllocator final : public IAllocator

{

public:

    DummyAllocator() = default;


    void *allocate(size_t size, size_t alignment) override

    {

        ++_n_calls;

        return _backend_allocator.allocate(size, alignment);

    }

    void free(void *ptr) override

    {

        return _backend_allocator.free(ptr);

    }

    std::unique_ptr<IMemoryRegion> make_region(size_t size, size_t alignment) override

    {

        // Needs to be implemented as is the one that is used internally by the CLTensorAllocator

        ++_n_calls;

        return _backend_allocator.make_region(size, alignment);

    }

    int get_n_calls() const

    {

        return _n_calls;

    }


private:

    int               _n_calls{};

    CLBufferAllocator _backend_allocator{};

};


void run_conv2d(std::shared_ptr<IMemoryManager> mm, IAllocator &mm_allocator)

{

    // Create tensors

    CLTensor src, weights, bias, dst;

    src.allocator()->init(TensorInfo(TensorShape(16U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));

    weights.allocator()->init(TensorInfo(TensorShape(16U, 3U, 3U, 32U), 1, DataType::F32, DataLayout::NHWC));

    bias.allocator()->init(TensorInfo(TensorShape(32U), 1, DataType::F32, DataLayout::NHWC));

    dst.allocator()->init(TensorInfo(TensorShape(32U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));


    // Create and configure function

    CLGEMMConvolutionLayer conv(mm);

    conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1U, 1U, 1U, 1U));


    // Allocate tensors

    src.allocator()->allocate();

    weights.allocator()->allocate();

    bias.allocator()->allocate();

    dst.allocator()->allocate();


    // Finalize memory manager

    if(mm != nullptr)

    {

        mm->populate(mm_allocator, 1 /* num_pools */);

        ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);

        ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS);

    }


    conv.run();

}

} // namespace


TEST_SUITE(CL)

TEST_SUITE(UNIT)

TEST_SUITE(TensorAllocator)


/* Validate that an external global allocator can be used for all internal allocations */

TEST_CASE(ExternalGlobalAllocator, framework::DatasetMode::ALL)

{

    DummyAllocator global_tensor_alloc;

    CLTensorAllocator::set_global_allocator(&global_tensor_alloc);


    // Run a convolution

    run_conv2d(nullptr /* mm */, global_tensor_alloc);


    // Check that allocator has been called multiple times > 4

    ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);


    // Nullify global allocator

    CLTensorAllocator::set_global_allocator(nullptr);

}


/* Validate that an external global allocator can be used for the pool manager */

TEST_CASE(ExternalGlobalAllocatorMemoryPool, framework::DatasetMode::ALL)

{

    auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();

    auto pool_mgr     = std::make_shared<PoolManager>();

    auto mm           = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);


    DummyAllocator global_tensor_alloc;

    CLTensorAllocator::set_global_allocator(&global_tensor_alloc);


    // Run a convolution

    run_conv2d(mm, global_tensor_alloc);


    // Check that allocator has been called multiple times > 4

    ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);


    // Nullify global allocator

    CLTensorAllocator::set_global_allocator(nullptr);

}


/** Validates import memory interface when importing cl buffer objects */

TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)

{

    // Init tensor info

    const TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32);


    // Allocate memory buffer

    const size_t total_size = info.total_size();

    auto         buf        = cl::Buffer(CLScheduler::get().context(), CL_MEM_READ_WRITE, total_size);


    // Negative case : Import nullptr

    CLTensor t1;

    t1.allocator()->init(info);

    ARM_COMPUTE_ASSERT(!bool(t1.allocator()->import_memory(cl::Buffer())));

    ARM_COMPUTE_ASSERT(t1.info()->is_resizable());


    // Negative case : Import memory to a tensor that is memory managed

    CLTensor    t2;

    MemoryGroup mg;

    t2.allocator()->set_associated_memory_group(&mg);

    ARM_COMPUTE_ASSERT(!bool(t2.allocator()->import_memory(buf)));

    ARM_COMPUTE_ASSERT(t2.info()->is_resizable());


    // Negative case : Invalid buffer size

    CLTensor         t3;

    const TensorInfo info_neg(TensorShape(32U, 16U, 3U), 1, DataType::F32);

    t3.allocator()->init(info_neg);

    ARM_COMPUTE_ASSERT(!bool(t3.allocator()->import_memory(buf)));

    ARM_COMPUTE_ASSERT(t3.info()->is_resizable());


    // Positive case : Set raw pointer

    CLTensor t4;

    t4.allocator()->init(info);

    ARM_COMPUTE_ASSERT(bool(t4.allocator()->import_memory(buf)));

    ARM_COMPUTE_ASSERT(!t4.info()->is_resizable());

    ARM_COMPUTE_EXPECT(t4.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS);

    t4.allocator()->free();

    ARM_COMPUTE_ASSERT(t4.info()->is_resizable());

    ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS);

}


/** Validates import memory interface when importing malloced memory */

TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL)

{

    // Check if import extension is supported

    if(!device_supports_extension(CLKernelLibrary::get().get_device(), "cl_arm_import_memory_host"))

    {

        return;

    }

    else

    {

        const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU);

        const TensorShape         shape     = TensorShape(24U, 16U, 3U);

        const DataType            data_type = DataType::F32;


        // Create tensor

        const TensorInfo info(shape, 1, data_type);

        CLTensor         tensor;

        tensor.allocator()->init(info);


        // Create and configure activation function

        CLActivationLayer act_func;

        act_func.configure(&tensor, nullptr, act_info);


        // Allocate and import tensor

        const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();

        const size_t total_size_in_bytes = tensor.info()->total_size();

        const size_t alignment           = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();

        size_t       space               = total_size_in_bytes + alignment;

        auto         raw_data            = std::make_unique<uint8_t[]>(space);


        void *aligned_ptr = raw_data.get();

        std::align(alignment, total_size_in_bytes, aligned_ptr, space);


        cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes));

        ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));

        ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());


        // Fill tensor

        std::uniform_real_distribution<float> distribution(-5.f, 5.f);

        std::mt19937                          gen(library->seed());

        auto                                 *typed_ptr = reinterpret_cast<float *>(aligned_ptr);

        for(unsigned int i = 0; i < total_size_in_elems; ++i)

        {

            typed_ptr[i] = distribution(gen);

        }


        // Execute function and sync

        act_func.run();

        CLScheduler::get().sync();


        // Validate result by checking that the input has no negative values

        for(unsigned int i = 0; i < total_size_in_elems; ++i)

        {

            ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS);

        }


        // Release resources

        tensor.allocator()->free();

        ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS);

    }

}


#if !defined(BARE_METAL)

/** Validates import memory interface when importing memory mapped objects */

TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)

{

    // Check if import extension is supported

    if(!device_supports_extension(CLKernelLibrary::get().get_device(), "cl_arm_import_memory_host"))

    {

        return;

    }

    else

    {

        const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU);

        const TensorShape         shape     = TensorShape(24U, 16U, 3U);

        const DataType            data_type = DataType::F32;


        // Create tensor

        const TensorInfo info(shape, 1, data_type);

        CLTensor         tensor;

        tensor.allocator()->init(info);


        // Create and configure activation function

        CLActivationLayer act_func;

        act_func.configure(&tensor, nullptr, act_info);


        // Get number of elements

        const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();

        const size_t total_size_in_bytes = tensor.info()->total_size();


        // Create file

        std::ofstream output_file("test_mmap_import.bin", std::ios::binary | std::ios::out);

        output_file.seekp(total_size_in_bytes - 1);

        output_file.write("", 1);

        output_file.close();


        // Map file

        utils::mmap_io::MMappedFile mmapped_file("test_mmap_import.bin", 0 /** Whole file */, 0);

        ARM_COMPUTE_ASSERT(mmapped_file.is_mapped());

        unsigned char *data = mmapped_file.data();


        cl::Buffer wrapped_buffer(import_malloc_memory_helper(data, total_size_in_bytes));

        ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));

        ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());


        // Fill tensor

        std::uniform_real_distribution<float> distribution(-5.f, 5.f);

        std::mt19937                          gen(library->seed());

        auto                                 *typed_ptr = reinterpret_cast<float *>(data);

        for(unsigned int i = 0; i < total_size_in_elems; ++i)

        {

            typed_ptr[i] = distribution(gen);

        }


        // Execute function and sync

        act_func.run();

        CLScheduler::get().sync();


        // Validate result by checking that the input has no negative values

        for(unsigned int i = 0; i < total_size_in_elems; ++i)

        {

            ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS);

        }


        // Release resources

        tensor.allocator()->free();

        ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());

    }

}

#endif // !defined(BARE_METAL)


/** Validates symmetric per channel quantization */

TEST_CASE(Symm8PerChannelQuantizationInfo, framework::DatasetMode::ALL)

{

    // Create tensor

    CLTensor                 tensor;

    const std::vector<float> scale = { 0.25f, 1.4f, 3.2f, 2.3f, 4.7f };

    const TensorInfo         info(TensorShape(32U, 16U), 1, DataType::QSYMM8_PER_CHANNEL, QuantizationInfo(scale));

    tensor.allocator()->init(info);


    // Check quantization information

    ARM_COMPUTE_EXPECT(!tensor.info()->quantization_info().empty(), framework::LogLevel::ERRORS);

    ARM_COMPUTE_EXPECT(!tensor.info()->quantization_info().scale().empty(), framework::LogLevel::ERRORS);

    ARM_COMPUTE_EXPECT(tensor.info()->quantization_info().scale().size() == scale.size(), framework::LogLevel::ERRORS);

    ARM_COMPUTE_EXPECT(tensor.info()->quantization_info().offset().empty(), framework::LogLevel::ERRORS);


    CLQuantization quantization = tensor.quantization();

    ARM_COMPUTE_ASSERT(quantization.scale != nullptr);

    ARM_COMPUTE_ASSERT(quantization.offset != nullptr);


    // Check OpenCL quantization arrays before allocating

    ARM_COMPUTE_EXPECT(quantization.scale->max_num_values() == 0, framework::LogLevel::ERRORS);

    ARM_COMPUTE_EXPECT(quantization.offset->max_num_values() == 0, framework::LogLevel::ERRORS);


    // Check OpenCL quantization arrays after allocating

    tensor.allocator()->allocate();

    ARM_COMPUTE_EXPECT(quantization.scale->max_num_values() == scale.size(), framework::LogLevel::ERRORS);

    ARM_COMPUTE_EXPECT(quantization.offset->max_num_values() == 0, framework::LogLevel::ERRORS);


    // Validate that the scale values are the same

    auto  cl_scale_buffer = quantization.scale->cl_buffer();

    void *mapped_ptr      = CLScheduler::get().queue().enqueueMapBuffer(cl_scale_buffer, CL_TRUE, CL_MAP_READ, 0, scale.size());

    auto  cl_scale_ptr    = static_cast<float *>(mapped_ptr);

    for(unsigned int i = 0; i < scale.size(); ++i)

    {

        ARM_COMPUTE_EXPECT(cl_scale_ptr[i] == scale[i], framework::LogLevel::ERRORS);

    }

    CLScheduler::get().queue().enqueueUnmapMemObject(cl_scale_buffer, mapped_ptr);

}


TEST_SUITE_END() // TensorAllocator

TEST_SUITE_END() // UNIT

TEST_SUITE_END() // CL

} // namespace validation

} // namespace test

} // namespace arm_compute