21.02
|
Go to the source code of this file.
Macros | |
#define | ARM_DOT1(a, b, c) |
#define | ARM_DOT2(a, b, c) |
#define | ARM_DOT3(a, b, c) |
#define | ARM_DOT4(a, b, c) |
#define | ARM_DOT8(a, b, c) |
#define | ARM_DOT16(a, b, c) |
#define | LINEAR_2_COORDS(i) |
#define | MI_INIT(i) |
#define | TENSOR_DOT(K0, i) ARM_DOT_K0XN0(K0, a##i, b, c##i); |
Functions | |
__kernel void | direct_convolution_nhwc (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_stride_z, uint src_step_z, uint src_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_stride_z, uint dst_step_z, uint dst_offset_first_element_in_bytes, __global uchar *wei_ptr, uint wei_stride_x, uint wei_step_x, uint wei_stride_y, uint wei_step_y, uint wei_stride_z, uint wei_step_z, uint wei_offset_first_element_in_bytes, __global uchar *bia_ptr, uint bia_stride_x, uint bia_step_x, uint bia_offset_first_element_in_bytes, unsigned int wei_stride_w) |
OpenCL kernel to compute the direct convolution. More... | |
#define ARM_DOT1 | ( | a, | |
b, | |||
c | |||
) |
Definition at line 266 of file direct_convolution.cl.
#define ARM_DOT16 | ( | a, | |
b, | |||
c | |||
) |
Definition at line 290 of file direct_convolution.cl.
#define ARM_DOT2 | ( | a, | |
b, | |||
c | |||
) |
Definition at line 270 of file direct_convolution.cl.
#define ARM_DOT3 | ( | a, | |
b, | |||
c | |||
) |
Definition at line 275 of file direct_convolution.cl.
#define ARM_DOT4 | ( | a, | |
b, | |||
c | |||
) |
Definition at line 280 of file direct_convolution.cl.
#define ARM_DOT8 | ( | a, | |
b, | |||
c | |||
) |
Definition at line 285 of file direct_convolution.cl.
#define LINEAR_2_COORDS | ( | i | ) |
Referenced by direct_convolution_nhwc().
#define MI_INIT | ( | i | ) |
Referenced by direct_convolution_nhwc().
#define TENSOR_DOT | ( | K0, | |
i | |||
) | ARM_DOT_K0XN0(K0, a##i, b, c##i); |
Referenced by direct_convolution_nhwc().
__kernel void direct_convolution_nhwc | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_stride_z, | ||
uint | src_step_z, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_stride_z, | ||
uint | dst_step_z, | ||
uint | dst_offset_first_element_in_bytes, | ||
__global uchar * | wei_ptr, | ||
uint | wei_stride_x, | ||
uint | wei_step_x, | ||
uint | wei_stride_y, | ||
uint | wei_step_y, | ||
uint | wei_stride_z, | ||
uint | wei_step_z, | ||
uint | wei_offset_first_element_in_bytes, | ||
__global uchar * | bia_ptr, | ||
uint | bia_stride_x, | ||
uint | bia_step_x, | ||
uint | bia_offset_first_element_in_bytes, | ||
unsigned int | wei_stride_w | ||
) |
OpenCL kernel to compute the direct convolution.
[in] | src_ptr | Pointer to the source tensor. Supported data type: F16/F32 |
[in] | src_stride_x | Stride of the source tensor in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source tensor in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_stride_z | Stride of the source tensor in Z dimension (in bytes) |
[in] | src_step_z | src_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source tensor |
[out] | dst_ptr | Pointer to the destination tensor. Supported data type: same as src_ptr |
[in] | dst_stride_x | Stride of the destination tensor in X dimension (in bytes) |
[in] | dst_step_x | dst_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination tensor in Y dimension (in bytes) |
[in] | dst_step_y | dst_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_stride_z | Stride of the destination tensor in Z dimension (in bytes) |
[in] | dst_step_z | dst_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination tensor |
[in] | wei_ptr | Pointer to the weights tensor. Supported data type: same as src_ptr |
[in] | wei_stride_x | Stride of the weights tensor in X dimension (in bytes) |
[in] | wei_step_x | wei_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | wei_stride_y | Stride of the weights tensor in Y dimension (in bytes) |
[in] | wei_step_y | wei_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | wei_stride_z | Stride of the weights tensor in Z dimension (in bytes) |
[in] | wei_step_z | wei_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | wei_offset_first_element_in_bytes | The offset of the first element in the bias matrix |
[in] | bia_ptr | (Optional) Pointer to the bias tensor Supported data type: same as src_ptr (if F32/F16) or S32 (if QASYMM8/QASYMM8_SIGNED) |
[in] | bia_stride_x | (Optional) Stride of the bias tensor in X dimension (in bytes) |
[in] | bia_step_x | (Optional) bia_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | bia_offset_first_element_in_bytes | (Optional) The offset of the first element in the bias matrix |
[in] | wei_stride_w | Stride of the weights tensor in W dimension (in bytes) |
Definition at line 459 of file direct_convolution.cl.
References ADD_BLOCK_BROADCAST, arm_compute::test::validation::b, LINEAR_2_COORDS, LOAD_BLOCK, LOAD_BLOCK_INDIRECT, MI_INIT, PARTIAL_STORE_N0, QUANTIZE, REPEAT_VAR_INIT_TO_CONST, STORE_VECTOR_SELECT, TENSOR_DOT, and VEC_DATA_TYPE.