Compute Library
 22.08
space_to_batch.cl
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "helpers.h"
25 
26 #if defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(WIDTH_IN) && defined(HEIGHT_IN)
27 /** Calculate the space to batch conversion.
28  *
29  * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
30  * @note The block shape tensor rank must be passed at compile time using -DBLOCK_SHAPE_DIM. e.g. -DBLOCK_SHAPE_DIM=2
31  *
32  * @param[in] input_ptr Pointer to the source tensor. Supported data types: All
33  * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
34  * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
35  * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
36  * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
37  * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
38  * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
39  * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
40  * @param[in] paddings_ptr Pointer to the second source image. Supported data types: S32
41  * @param[in] paddings_stride_x Stride of the paddinds tensor in X dimension (in bytes)
42  * @param[in] paddings_step_x paddings_stride_x * number of elements along X processed per workitem(in bytes)
43  * @param[in] paddings_stride_y Stride of the paddinds tensor in Y dimension (in bytes)
44  * @param[in] paddings_step_y paddings_stride_y * number of elements along Y processed per workitem(in bytes)
45  * @param[in] paddingse_offset_first_element_in_bytes The offset of the first element in the second source image
46  * @param[in] block_shape_ptr Pointer to the block shape tensor. Supported data types: S32
47  * @param[in] block_shape_stride_x Stride of the block shape tensor in X dimension (in bytes)
48  * @param[in] block_shape_step_x block_shape_stride_x * number of elements along X processed per workitem(in bytes)
49  * @param[in] block_shape_offset_first_element_in_bytes The offset of the first element in the block shapetensor
50  * @param[in] batch_id The output tensor batch id
51  * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
52  * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
53  * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
54  * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
55  * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
56  * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
57  * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
58  * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
59  */
60 __kernel void space_to_batch_nchw(
62  IMAGE_DECLARATION(paddings),
63  VECTOR_DECLARATION(block_shape),
64  const int batch_id,
65  TENSOR3D_DECLARATION(output))
66 {
68  Image pad = CONVERT_TO_IMAGE_STRUCT_NO_STEP(paddings);
69  Vector block = CONVERT_TO_VECTOR_STRUCT_NO_STEP(block_shape);
71 
72  const int pad_left_x = *((__global int *)offset(&pad, 0, 0));
73  const int pad_right_x = *((__global int *)offset(&pad, 1, 0));
74  const int pad_left_y = *((__global int *)offset(&pad, 0, 1));
75  const int pad_right_y = *((__global int *)offset(&pad, 1, 1));
76 
77  int block_x = *((__global int *)vector_offset(&block, 0));
78  int block_y = *((__global int *)vector_offset(&block, 1));
79 
80  const int out_x = get_global_id(0);
81  const int out_y = get_global_id(1);
82  const int z = get_global_id(2);
83 
84  const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x);
85  const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x);
86 
87  if(((pos_y >= pad_left_y) && (pos_y < pad_left_y + HEIGHT_IN) && (pos_x >= pad_left_x) && (pos_x < pad_left_x + WIDTH_IN)))
88  {
89  const int w = batch_id % BATCH_IN;
90  const int in_x = pos_x - pad_left_x;
91  const int in_y = pos_y - pad_left_y;
92 
93  *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_x, in_y, z, w));
94  }
95 }
96 
97 #endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(WIDTH_IN) && defined(HEIGHT_IN)
98 
99 #if defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) && defined(WIDTH_IN) && defined(HEIGHT_IN)
100 /** Calculate the space to batch conversion.
101  *
102  * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
103  * @note The input tensor batch size must be passed at compile time using -DBATCH_SIZE. e.g. -DBATCH_SIZE=2
104  * @note The block shape x must be passed at compile time using -DBLOCK_SHAPE_X. e.g. -DBLOCK_SHAPE_X=2
105  * @note The block shape y must be passed at compile time using -DBLOCK_SHAPE_Y. e.g. -DBLOCK_SHAPE_Y=2
106  * @note The starting pad value of x must be passed at compile time using -DPAD_LEFT_X. e.g. -DPAD_LEFT_X=2
107  * @note The ending pad value of x must be passed at compile time using -DPAD_RIGHT_X. e.g. -DPAD_RIGHT_X=2
108  * @note The starting pad value of y must be passed at compile time using -DPAD_LEFT_Y. e.g. -DPAD_LEFT_Y=2
109  * @note The ending pad value of y must be passed at compile time using -DPAD_RIGHT_Y. e.g. -DPAD_RIGHT_X=2
110  *
111  * @param[in] input_ptr Pointer to the source tensor. Supported data types: All
112  * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
113  * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
114  * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
115  * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
116  * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
117  * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
118  * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
119  * @param[in] batch_id The output tensor batch id
120  * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
121  * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
122  * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
123  * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
124  * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
125  * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
126  * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
127  * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
128  */
129 __kernel void space_to_batch_static_nchw(
131  const int batch_id,
132  TENSOR3D_DECLARATION(output))
133 {
135  Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
136 
137  int block_x = BLOCK_SHAPE_X;
138  int block_y = BLOCK_SHAPE_Y;
139 
140  const int out_x = get_global_id(0);
141  const int out_y = get_global_id(1);
142  const int z = get_global_id(2);
143 
144  const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x);
145  const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x);
146 
147  if(pos_y >= PAD_LEFT_Y && pos_y < PAD_LEFT_Y + HEIGHT_IN && pos_x >= PAD_LEFT_X && pos_x < PAD_LEFT_X + WIDTH_IN)
148  {
149  const int w = batch_id % BATCH_IN;
150  const int in_x = pos_x - PAD_LEFT_X;
151  const int in_y = pos_y - PAD_LEFT_Y;
152 
153  *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_x, in_y, z, w));
154  }
155 }
156 #endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) && defined(WIDTH_IN) && defined(HEIGHT_IN)
Structure to hold Vector information.
Definition: helpers.h:889
__global uchar * offset(const Image *img, int x, int y)
Get the pointer position of a Image.
Definition: helpers.h:1084
SimpleTensor< float > w
Definition: DFT.cpp:156
#define IMAGE_DECLARATION(name)
Definition: helpers.h:805
Structure to hold 3D tensor information.
Definition: helpers.h:906
Structure to hold 4D tensor information.
Definition: helpers.h:916
#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size)
Definition: helpers.h:881
#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name)
Definition: helpers.h:858
#define VECTOR_DECLARATION(name)
Definition: helpers.h:799
__global const uchar * tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
Get the pointer position of a Tensor4D.
Definition: helpers.h:1109
Structure to hold Image information.
Definition: helpers.h:897
#define CONVERT_TO_TENSOR3D_STRUCT(name)
Definition: helpers.h:870
__global const uchar * vector_offset(const Vector *vec, int x)
Get the pointer position of a Vector.
Definition: helpers.h:1073
#define TENSOR4D_DECLARATION(name)
Definition: helpers.h:823
__global uchar * ptr
Pointer to the starting postion of the buffer.
Definition: helpers.h:908
#define TENSOR3D_DECLARATION(name)
Definition: helpers.h:813
#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name)
Definition: helpers.h:852