OpenGL ES SDK for Android ARM Developer Center
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sort.cpp
Go to the documentation of this file.
1 /* Copyright (c) 2014-2017, ARM Limited and Contributors
2  *
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge,
6  * to any person obtaining a copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation the rights to
8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
9  * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
14  * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19  */
20 
21 #include "sort.h"
22 #include "common/matrix.h"
23 #include "common/glutil.h"
24 #include "common/shader.h"
25 #include "common/common.h"
26 #include <string.h>
27 
28 #define MAX_SCAN_LEVELS 4
29 
30 Shader
35 
36 GLuint
39  buf_flags,
40  buf_sorted;
41 
42 unsigned scan_levels;
43 
44 bool sort_init()
45 {
46  string res = "/data/data/com.arm.malideveloper.openglessdk.computeparticles/files/";
47  if (!shader_scan.load_compute_from_file(res + "scan.cs") ||
48  !shader_scan_first.load_compute_from_file(res + "scan_first.cs") ||
49  !shader_resolve.load_compute_from_file(res + "scan_resolve.cs") ||
50  !shader_reorder.load_compute_from_file(res + "scan_reorder.cs"))
51  {
52  return false;
53  }
54 
55  if (!shader_scan.link() ||
57  !shader_resolve.link() ||
59  {
60  return false;
61  }
62 
63  // We do the scan recursively. We have to do scan until one the entire dispatch can be computed by a single work group.
64  unsigned elems = NUM_KEYS;
65  scan_levels = 0;
66  while (elems > 1)
67  {
68  scan_levels++;
69  elems = (elems + BLOCK_SIZE - 1) / BLOCK_SIZE;
70  }
71 
72  buf_sorted = gen_buffer(GL_SHADER_STORAGE_BUFFER, GL_DYNAMIC_COPY, NUM_KEYS * sizeof(vec4), NULL);
73  buf_flags = gen_buffer(GL_SHADER_STORAGE_BUFFER, GL_DYNAMIC_COPY, NUM_KEYS * sizeof(GLuint), NULL);
74 
75  // Allocate memory for scan levels. Make sure to properly pad them to a workgroups worth of work.
76  elems = NUM_BLOCKS;
77  for (unsigned i = 0; i < scan_levels; i++)
78  {
79  buf_scan[i] = gen_buffer(GL_SHADER_STORAGE_BUFFER, GL_DYNAMIC_COPY, elems * BLOCK_SIZE * 4 * sizeof(GLuint), NULL);
80  elems = (elems + BLOCK_SIZE - 1) / BLOCK_SIZE;
81  buf_sums[i] = gen_buffer(GL_SHADER_STORAGE_BUFFER, GL_DYNAMIC_COPY, elems * BLOCK_SIZE * 4 * sizeof(GLuint), NULL);
82  }
83 
84  return true;
85 }
86 
87 void sort_free()
88 {
91 
92  for (unsigned i = 0; i < scan_levels; i++)
93  {
94  del_buffer(buf_scan[i]);
95  del_buffer(buf_sums[i]);
96  }
97 
102 }
103 
104 void sort_bits(GLuint buf_input, int bit_offset, vec3 axis, float z_min, float z_max)
105 {
106  // Keep track of which dispatch sizes we used to make the resolve steps simpler.
107  unsigned dispatch_sizes[MAX_SCAN_LEVELS] = {0};
108 
109  unsigned blocks = NUM_BLOCKS;
110 
111  // First pass. Compute 16-bit unsigned depth and apply first pass of scan algorithm.
112  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buf_input);
113  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buf_scan[0]);
114  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buf_sums[0]);
115  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, buf_flags);
117  uniform("bitOffset", bit_offset);
118  uniform("axis", axis);
119  uniform("zMin", z_min);
120  uniform("zMax", z_max);
121  dispatch_sizes[0] = blocks;
122  glDispatchCompute(blocks, 1, 1);
123  glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
124 
125  // If we processed more than one work group of data, we're not done,
126  // so scan buf_sums[0] and keep scanning recursively like this until buf_sums[N] becomes
127  // a single value.
129  for (unsigned i = 1; i < scan_levels; i++)
130  {
131  blocks = (blocks + BLOCK_SIZE - 1) / BLOCK_SIZE;
132  dispatch_sizes[i] = blocks;
133 
134  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buf_sums[i - 1]);
135  // If we only do one work group we don't need to resolve it later,
136  // and we can update the scan buffer inplace.
137  if (blocks <= 1)
138  {
139  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buf_sums[i - 1]);
140  }
141  else
142  {
143  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buf_scan[i]);
144  }
145  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buf_sums[i]);
146 
147  glDispatchCompute(blocks, 1, 1);
148  glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
149  }
150 
151  // Go backwards, we want to end up with a buf_sums[0] which has been properly scanned.
152  // Once we have buf_scan[0] and buf_sums[0], we can do the reordering step.
154  for (unsigned i = scan_levels - 1; i; i--)
155  {
156  if (dispatch_sizes[i] <= 1) // No need to resolve, buf_sums[i - 1] is already correct
157  {
158  continue;
159  }
160 
161  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buf_scan[i]);
162  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buf_sums[i]);
163  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buf_sums[i - 1]);
164  glDispatchCompute(dispatch_sizes[i], 1, 1);
165  glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
166  }
167 
168  // We can now reorder our input properly.
170  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buf_input);
171  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buf_scan[0]);
172  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buf_sums[0]);
173  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, buf_sorted);
174  glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, buf_flags);
175  glDispatchCompute(NUM_BLOCKS, 1, 1);
176  glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
177 
178  // Now we're done :)
179 }
180 
181 void radix_sort(GLuint buf_input, vec3 axis, float z_min, float z_max)
182 {
183  for (uint32_t i = 0; i < 8; i++)
184  {
185  sort_bits(buf_input, i * 2, axis, z_min, z_max);
186 
187  // Swap for the next digit stage
188  // The <buf_input> buffer will in the end hold the latest sorted data
189  std::swap(buf_input, buf_sorted);
190  }
191 
192  // We use the position data to draw the particles afterwards
193  // Thus we need to ensure that the data is up to date
194  glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
195 }
void sort_bits(GLuint buf_input, int bit_offset, vec3 axis, float z_min, float z_max)
Definition: sort.cpp:104
Shader shader_reorder
Definition: sort.cpp:31
bool load_compute_from_file(string cs_path)
Definition: shader.cpp:117
Definition: matrix.h:51
void radix_sort(GLuint buf_input, vec3 axis, float z_min, float z_max)
Definition: sort.cpp:181
GLuint buf_scan[MAX_SCAN_LEVELS]
Definition: sort.cpp:37
void del_buffer(GLuint buffer)
Definition: glutil.cpp:137
Shader shader_scan
Definition: sort.cpp:31
GLuint buf_flags
Definition: sort.cpp:37
const uint32_t NUM_BLOCKS
Definition: sort.h:27
Definition: matrix.h:75
const uint32_t BLOCK_SIZE
Definition: sort.h:25
void dispose()
Definition: shader.cpp:129
#define MAX_SCAN_LEVELS
Definition: sort.cpp:28
bool link()
Definition: shader.cpp:124
GLuint buf_sums[MAX_SCAN_LEVELS]
Definition: sort.cpp:37
const uint32_t NUM_KEYS
Definition: sort.h:26
Shader shader_scan_first
Definition: sort.cpp:31
GLuint buf_sorted
Definition: sort.cpp:37
GLuint gen_buffer(GLenum target, GLenum usage, GLsizei size, const void *data)
Definition: glutil.cpp:122
Definition: shader.h:27
void uniform(string name, const mat4 &v)
Definition: glutil.cpp:97
void sort_free()
Definition: sort.cpp:87
bool sort_init()
Definition: sort.cpp:44
uniform vec3 axis
Definition: scan_first.cs:129
void use_shader(Shader shader)
Definition: glutil.cpp:81
Shader shader_resolve
Definition: sort.cpp:31
unsigned scan_levels
Definition: sort.cpp:42
typedef GLuint(GL_APIENTRYP PFNGLGETDEBUGMESSAGELOGKHRPROC)(GLuint count