ComputeLibrary/v21.02/_g_c_g_e_m_m_matrix_multiply_kernel_8cpp_source.xhtml

 /*
  * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"

 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
 #include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
 #include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
 #include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "src/core/AccessWindowStatic.h"
 #include "src/core/AccessWindowTranspose.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"
 #include "support/StringSupport.h"

 #include <set>
 #include <string>

 using namespace arm_compute;
 using namespace arm_compute::misc::shape_calculator;

 namespace
 {
 using ElementsProcessed = Steps;

 inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
 {
     ARM_COMPUTE_UNUSED(reshape_info);
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3, "The number of dimensions for the matrix B must be <= 3");

     if(!is_interleaved_transposed)
     {
         ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != input1->dimension(1));

         if(output->total_size() != 0)
         {
             ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != output->dimension(0));
             ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != output->dimension(1));
             ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
         }
     }
     else
     {
         const int m                         = reshape_info.m();
         const int n                         = reshape_info.n();
         const int k                         = reshape_info.k();
         const int mult_transpose1xW_width   = reshape_info.mult_transpose1xW_width();
         const int mult_interleave4x4_height = reshape_info.mult_interleave4x4_height();

         TensorShape tensor_shape0{ input0->tensor_shape() };
         tensor_shape0.set(0, k);
         tensor_shape0.set(1, m);

         TensorShape tensor_shape1{ input1->tensor_shape() };
         tensor_shape1.set(0, n);
         tensor_shape1.set(1, k);

         const TensorInfo tensor_info0 = input0->clone()->set_tensor_shape(tensor_shape0);
         const TensorInfo tensor_info1 = input1->clone()->set_tensor_shape(tensor_shape1);

         const TensorInfo tensor_info_reshaped0 = input0->clone()->set_tensor_shape(compute_interleaved_shape(tensor_info0, mult_interleave4x4_height));
         const TensorInfo tensor_info_reshaped1 = input1->clone()->set_tensor_shape(compute_transpose1xW_with_element_size_shape(tensor_info1, mult_transpose1xW_width));

         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input0, &tensor_info_reshaped0);
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, &tensor_info_reshaped1);

         if(output->total_size() != 0)
         {
             ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != static_cast<size_t>(n));
             ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != static_cast<size_t>(m));
             ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output);
         }
     }

     return Status{};
 }

 inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output,
                                                                bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info,
                                                                GPUTarget gpu_target, ElementsProcessed &num_elements_processed)
 {
     ARM_COMPUTE_UNUSED(gpu_target);

     // Output tensor auto inizialitation if not yet initialized
     TensorShape tensor_shape{ input0->tensor_shape() };
     tensor_shape.set(0, is_interleaved_transposed ? reshape_info.n() : input1->dimension(0));
     tensor_shape.set(1, is_interleaved_transposed ? reshape_info.m() : input0->dimension(1));

     auto_init_if_empty(*output, input0->clone()->set_tensor_shape(tensor_shape));

     bool   window_changed = false;
     Window win{};

     const DataType data_type                           = input0->data_type();
     unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
     unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];

     if(is_interleaved_transposed)
     {
         // Configure window kernel
         num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(data_type);
         num_elems_processed_per_iteration_y = 4;

         win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));

         AccessWindowRectangle input0_access(input0, 0, 0, num_elems_processed_per_iteration_y, 1, 1.f, 0.25f);
         AccessWindowTranspose input1_access(input1, 0, 0, num_elems_processed_per_iteration_x, 1, 0.f, 0.25f);
         AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);

         update_window_and_padding(win, input0_access, input1_access, output_access);

         output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
     }
     else // The input tensors have not been reshaped
     {
         // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor.
         num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->dimension(1)), 4);

         switch(data_type)
         {
             case DataType::F16:
                 num_elems_processed_per_iteration_x = 4;
                 break;

             case DataType::F32:
                 num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(data_type);
                 break;

             default:
                 ARM_COMPUTE_ERROR("Current data type is not supported");
                 break;
         }

         win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));

         AccessWindowStatic    input0_access(input0, 0, 0, ceil_to_multiple(input0->dimension(0), 8), ceil_to_multiple(input0->dimension(1), num_elems_processed_per_iteration_y));
         AccessWindowStatic    input1_access(input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x), input1->dimension(1));
         AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);

         update_window_and_padding(win, input0_access, input1_access, output_access);

         Coordinates coord;
         coord.set_num_dimensions(output->num_dimensions());
         output_access.set_valid_region(win, ValidRegion(coord, output->tensor_shape()));
     }

     Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
     return std::make_pair(err, win);
 }
 } // namespace

 GCGEMMMatrixMultiplyKernel::GCGEMMMatrixMultiplyKernel()
     : _input0(nullptr), _input1(nullptr), _output(nullptr)
 {
 }

 void GCGEMMMatrixMultiplyKernel::configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);

     // Perform validate step
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info));

     _input0 = input0;
     _input1 = input1;
     _output = output;

     // Get target architecture
     GPUTarget gpu_target = get_target();

     ElementsProcessed num_elements_processed{};

     // Configure kernel window
     auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info, gpu_target, num_elements_processed);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
     IGCKernel::configure(win_config.second);

     // Create build options
     std::set<std::string> build_opts;
     std::string           kernel_name;

     build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
     build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
     build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
     build_opts.emplace("#define COLS_A " + support::cpp11::to_string(input0->info()->dimension(0)));
     build_opts.emplace("#define COLS_B " + support::cpp11::to_string(input1->info()->dimension(0)));
     build_opts.emplace("#define ALPHA " + float_to_string_with_full_precision(alpha));

     // Check if the output tensor is a vector. If so,the kernel runs the vector-matrix multiplication
     if(is_interleaved_transposed)
     {
         const int mult_transpose1xW_width   = reshape_info.mult_transpose1xW_width();
         const int mult_interleave4x4_height = reshape_info.mult_interleave4x4_height();

         build_opts.emplace("#define MULT_TRANSPOSE1XW_WIDTH " + support::cpp11::to_string(mult_transpose1xW_width));
         build_opts.emplace("#define MULT_INTERLEAVE4X4_HEIGHT " + support::cpp11::to_string(mult_interleave4x4_height));

         switch(input0->info()->data_type())
         {
             case DataType::F16:
                 build_opts.emplace("#define DATA_TYPE_FP16");
                 break;

             case DataType::F32:
                 build_opts.emplace("#define DATA_TYPE_FP32");
                 break;

             default:
                 ARM_COMPUTE_ERROR("Current data type is not supported");
                 break;
         }

         build_opts.emplace("#define GEMM_MM_INTERLEAVED_TRANSPOSED");

         kernel_name = "gemm_mm_interleaved_transposed";
     }
     else
     {
         // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor

         GPUTarget arch_target = get_arch_from_target(gpu_target);
         switch(input0->info()->data_type())
         {
             case DataType::F16:
                 build_opts.emplace("#define DATA_TYPE_FP16");
                 build_opts.emplace("#define MM_PROCESS_4X_OPTIMIZED");
                 build_opts.emplace("#define GEMM_MM_FLOATING_POINT");
                 break;

             case DataType::F32:
                 build_opts.emplace("#define DATA_TYPE_FP32");

                 if(arch_target == GPUTarget::BIFROST && input0->info()->num_dimensions() != 1)
                 {
                     build_opts.emplace("#define GEMM_MM_FLOATING_POINT_BIFROST");
                 }
                 else
                 {
                     build_opts.emplace("#define GEMM_MM_FLOATING_POINT");
                 }
                 break;

             default:
                 ARM_COMPUTE_ERROR("Current data type is not supported");
                 break;
         }

         build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_X " + support::cpp11::to_string(num_elements_processed.x()));
         build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_Y " + support::cpp11::to_string(num_elements_processed.y()));

         kernel_name = "gemm_mm_floating_point";
     }

     // Create kernel
     _kernel = GCKernelLibrary::get().create_kernel(kernel_name, build_opts);
 }

 Status GCGEMMMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved_transposed,
                                             const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target)
 {
     ARM_COMPUTE_UNUSED(alpha);
     ElementsProcessed num_elements_processed{};
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, is_interleaved_transposed, reshape_info));
     ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input0->clone().get(),
                                                               input1->clone().get(),
                                                               output->clone().get(),
                                                               is_interleaved_transposed,
                                                               reshape_info,
                                                               gpu_target,
                                                               num_elements_processed)
                                 .first);
     return Status{};
 }

 void GCGEMMMatrixMultiplyKernel::run(const Window &window)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);

     _kernel.use();

     Window slice          = window.first_slice_window_2D();
     Window slice_matrix_b = slice;

     slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1));
     slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1));

     do
     {
         Window slice_b = slice;
         // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A more than 2
         // This scenario can happen when the the matrix multiplication is used to perform a convolution operation
         if(_input1->info()->num_dimensions() < 3)
         {
             slice_b = slice_matrix_b;
         }

         unsigned int idx = 0;

         add_2D_tensor_argument(idx, _input0, 1, slice);
         add_2D_tensor_argument(idx, _input1, 2, slice_b);
         add_2D_tensor_argument(idx, _output, 3, slice);
         _kernel.update_shader_params();
         enqueue(*this, slice);
     }
     while(window.slide_window_slice_2D(slice));
 }
arm_compute::Window::first_slice_window_2D
Window first_slice_window_2D() const
First 2D slice of the window.
Definition: Window.h:283

arm_compute::ITensorInfo::num_dimensions
virtual size_t num_dimensions() const =0
The number of dimensions of the tensor (rank)

arm_compute::calculate_max_window
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
Definition: WindowHelpers.cpp:28

arm_compute::GEMMReshapeInfo::mult_interleave4x4_height
int mult_interleave4x4_height() const
Multiplication factor for the height of the 4x4 interleaved block.
Definition: Types.h:1893

WindowHelpers.h

arm_compute::IKernel::window
const Window & window() const
The maximum window the kernel can be executed on.
Definition: IKernel.cpp:28

arm_compute::TensorShape
Shape of a tensor.
Definition: TensorShape.h:39

arm_compute::GCGEMMMatrixMultiplyKernel::run
void run(const Window &window) override
Enqueue the OpenGL ES shader to process the given window.
Definition: GCGEMMMatrixMultiplyKernel.cpp:306

arm_compute::misc::shape_calculator::compute_transpose1xW_with_element_size_shape
TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInfo &b, int mult_transpose1xW_width=1)
Calculate the transposed 1xW width element shape.
Definition: ShapeCalculator.h:337

arm_compute::enqueue
void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws=gles::NDRange(1U, 1U, 1U))
Add the kernel to the command queue with the given window.
Definition: IGCKernel.cpp:41

arm_compute::ITensorInfo::dimension
virtual size_t dimension(size_t index) const =0
Return the size of the requested dimension.

ARM_COMPUTE_ERROR
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
Definition: Error.h:352

GCGEMMMatrixMultiplyKernel.h

arm_compute::GEMMReshapeInfo::mult_transpose1xW_width
int mult_transpose1xW_width() const
Multiplication factor for the width of the 1xW transposed block.
Definition: Types.h:1885

arm_compute::GEMMReshapeInfo
GEMM reshape information class.
Definition: Types.h:1831

ARM_COMPUTE_RETURN_ON_ERROR
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Definition: Error.h:204

arm_compute::support::cpp11::to_string
std::string to_string(T &&value)
Convert integer and float values to string.
Definition: StringSupport.h:162

arm_compute::ITensorInfo::data_type
virtual DataType data_type() const =0
Data type used for each element of the tensor.

Window.h

arm_compute::Format::F32
1 channel, 1 F32 per channel

IGCTensor.h

arm_compute::GCGEMMMatrixMultiplyKernel::GCGEMMMatrixMultiplyKernel
GCGEMMMatrixMultiplyKernel()
Default constructor.
Definition: GCGEMMMatrixMultiplyKernel.cpp:183

arm_compute::ITensorInfo
Store the tensor&#39;s metadata.
Definition: ITensorInfo.h:40

arm_compute::IGCTensor
Interface for GLES Compute tensor.
Definition: IGCTensor.h:35

ARM_COMPUTE_ERROR_THROW_ON
#define ARM_COMPUTE_ERROR_THROW_ON(status)
Definition: Error.h:455

arm_compute::Window::Dimension
Describe one of the image&#39;s dimensions with a start, end and step.
Definition: Window.h:77

arm_compute::Status
Status class.
Definition: Error.h:52

arm_compute::get_arch_from_target
GPUTarget get_arch_from_target(GPUTarget target)
Helper function to get the GPU arch.
Definition: GPUTarget.cpp:189

TensorInfo.h

ARM_COMPUTE_RETURN_ERROR_ON
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
Definition: Error.h:296

arm_compute::misc::shape_calculator::compute_interleaved_shape
TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height=1, bool reinterpret_input_as_3d=false)
Calculate the interleaved shape of an input tensor.
Definition: ShapeCalculator.h:260

arm_compute::Window::slide_window_slice_2D
bool slide_window_slice_2D(Window &slice) const
Slide the passed 2D window slice.
Definition: Window.h:323

arm_compute
Copyright (c) 2017-2021 Arm Limited.
Definition: 00_introduction.dox:24

arm_compute::Format::F16
1 channel, 1 F16 per channel

arm_compute::AccessWindowStatic
Implementation of a static rectangular access pattern.
Definition: AccessWindowStatic.h:46

ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR
#define ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(...)
Definition: Validate.h:163

arm_compute::GEMMReshapeInfo::n
int n() const
Number of matrix B columns.
Definition: Types.h:1869

Utils.h

arm_compute::test::validation::data_type
const DataType data_type
Definition: Im2Col.cpp:150

arm_compute::AccessWindowRectangle
Implementation of a rectangular access pattern.
Definition: IAccessWindow.h:107

StringSupport.h

arm_compute::Window::DimX
static constexpr size_t DimX
Alias for dimension 0 also known as X dimension.
Definition: Window.h:43

arm_compute::update_window_and_padding
bool update_window_and_padding(Window &win, Ts &&... patterns)
Update window and padding size for each of the access patterns.
Definition: WindowHelpers.h:46

ARM_COMPUTE_UNUSED
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
Definition: Error.h:152

arm_compute::float_to_string_with_full_precision
std::string float_to_string_with_full_precision(float val)
Create a string with the float in full precision.
Definition: Utils.h:1262

arm_compute::ITensorInfo::tensor_shape
virtual const TensorShape & tensor_shape() const =0
Size for each dimension of the tensor.

arm_compute::ceil_to_multiple
auto ceil_to_multiple(S value, T divisor) -> decltype(((value+divisor - 1)/divisor) *divisor)
Computes the smallest number larger or equal to value that is a multiple of divisor.
Definition: Utils.h:71

GCKernelLibrary.h
Manages all the GLES kernels compilation and caching, provides accessors for the GLES Context...

arm_compute::Steps
Class to describe a number of elements in each dimension.
Definition: Steps.h:40

arm_compute::Coordinates
Coordinates of an item.
Definition: Coordinates.h:37

arm_compute::GCGEMMMatrixMultiplyKernel::validate
static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target)
Static function to check if given info will lead to a valid configuration of GCGEMMMatrixMultiplyKern...
Definition: GCGEMMMatrixMultiplyKernel.cpp:289

arm_compute::GCGEMMMatrixMultiplyKernel::configure
void configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed=true, const GEMMReshapeInfo &reshape_info=GEMMReshapeInfo())
Initialise the kernel&#39;s input, output and alpha.
Definition: GCGEMMMatrixMultiplyKernel.cpp:188

kernel_name
std::string kernel_name
Definition: CLIm2ColKernel.cpp:52

arm_compute::auto_init_if_empty
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
Definition: AutoConfiguration.h:42

ShapeCalculator.h

arm_compute::misc::ICloneable::clone
virtual std::unique_ptr< T > clone() const =0
Provide a clone of the current object of class T.

Validate.h

arm_compute::ITensor::info
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor&#39;s metadata.

arm_compute::data_size_from_type
size_t data_size_from_type(DataType data_type)
The size in bytes of the data type.
Definition: Utils.h:106

arm_compute::GEMMReshapeInfo::k
int k() const
Number of matrix A columns or matrix B rows.
Definition: Types.h:1877

arm_compute::Window::set
void set(size_t dimension, const Dimension &dim)
Set the values of a given dimension.
Definition: Window.inl:49

Error.h

AccessWindowTranspose.h

arm_compute::IGCKernel::add_2D_tensor_argument
void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
Add the passed 2D tensor&#39;s parameters to the object&#39;s kernel&#39;s arguments starting from the index idx...
Definition: IGCKernel.cpp:127

arm_compute::ErrorCode::RUNTIME_ERROR
Generic runtime error.

ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
Definition: Validate.h:941

arm_compute::Window::DimY
static constexpr size_t DimY
Alias for dimension 1 also known as Y dimension.
Definition: Window.h:45

OpenGLES.h
Wrapper to configure the Khronos EGL and OpenGL ES C header.

arm_compute::GCKernelLibrary::get
static GCKernelLibrary & get()
Get the static instance of GCKernelLibrary.
Definition: GCKernelLibrary.cpp:333

GCHelpers.h

AutoConfiguration.h

arm_compute::ITensorInfo::total_size
virtual size_t total_size() const =0
Returns the total size of the tensor in bytes.

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
Definition: Validate.h:443

ARM_COMPUTE_CREATE_ERROR
#define ARM_COMPUTE_CREATE_ERROR(error_code, msg)
Creates an error with a given message.
Definition: Error.h:159

arm_compute::AccessWindowTranspose
Implementation of a XY-transpose access pattern.
Definition: AccessWindowTranspose.h:38

arm_compute::GPUTarget
GPUTarget
Available GPU Targets.
Definition: GPUTarget.h:34

arm_compute::GEMMReshapeInfo::m
int m() const
Number of matrix A rows.
Definition: Types.h:1861

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
Definition: Validate.h:545

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN
#define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c,...)
Definition: Validate.h:792

arm_compute::validate_arguments
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage)
Definition: NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp:45

arm_compute::GCKernelLibrary::create_kernel
GCKernel create_kernel(const std::string &shader_name, const StringSet &build_options_set={}) const
Creates a kernel from the kernel library.
Definition: GCKernelLibrary.cpp:366

ARM_COMPUTE_RETURN_ERROR_ON_MSG
#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg)
If the condition is true, an error is returned.
Definition: Error.h:244

ARM_COMPUTE_ERROR_ON_NULLPTR
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
Definition: Validate.h:161

arm_compute::TensorInfo
Store the tensor&#39;s metadata.
Definition: TensorInfo.h:45

Helpers.h

arm_compute::misc::shape_calculator
Definition: ShapeCalculator.h:40

AccessWindowStatic.h

arm_compute::Dimensions::set_num_dimensions
void set_num_dimensions(size_t num_dimensions)
Set number of dimensions.
Definition: Dimensions.h:149

arm_compute::GPUTarget::BIFROST

arm_compute::IGCKernel::get_target
GPUTarget get_target() const
Get the targeted GPU architecture.
Definition: IGCKernel.h:122

arm_compute::ValidRegion
Container for valid region of a window.
Definition: Types.h:188

Types.h

arm_compute::DataType
DataType
Available data types.
Definition: Types.h:77

arm_compute::Window
Describe a multidimensional execution window.
Definition: Window.h:39

arm_compute::TensorShape::set
TensorShape & set(size_t dimension, size_t value, bool apply_dim_correction=true, bool increase_dim_unit=true)
Accessor to set the value of one of the dimensions.
Definition: TensorShape.h:79

ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
Definition: Validate.h:205

arm_compute::test::validation::reference::slice
SimpleTensor< T > slice(const SimpleTensor< T > &src, Coordinates starts, Coordinates ends)
Definition: SliceOperations.cpp:38