OpenGL ES SDK for Android ARM Developer Center
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
hizculling.cpp
Go to the documentation of this file.
1 /* Copyright (c) 2014-2017, ARM Limited and Contributors
2  *
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge,
6  * to any person obtaining a copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation the rights to
8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
9  * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
14  * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19  */
20 
21 #include "culling.hpp"
22 #include <string.h>
23 
24 using namespace std;
25 
26 #define GROUP_SIZE_AABB 64
27 
29 {
30  culling_program = common_compile_compute_shader_from_file("hiz_cull.cs");
31  init();
32 }
33 
35 {
36  culling_program = common_compile_compute_shader_from_file(program);
37  init();
38 }
39 
41 {
42  // Blank fragment shader that only renders depth.
43  depth_render_program = common_compile_shader_from_file("depth.vs", "depth.fs");
44 
45  // Shader for manually mipmapping a depth texture.
46  depth_mip_program = common_compile_shader_from_file("quad.vs", "depth_mip.fs");
47 
48  lod_levels = DEPTH_SIZE_LOG2 + 1;
49 
50  GL_CHECK(glGenTextures(1, &depth_texture));
51  GL_CHECK(glBindTexture(GL_TEXTURE_2D, depth_texture));
52  GL_CHECK(glTexStorage2D(GL_TEXTURE_2D, lod_levels, GL_DEPTH24_STENCIL8,
54 
55  // We cannot do filtering on depth textures unless we're doing shadow compare (PCF).
56  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST));
57  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST_MIPMAP_NEAREST));
58 
59  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE));
60  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE));
61 
62  // Useful for debugging purposes so depth shows up as graytone and not just red.
63  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_R, GL_RED));
64  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_G, GL_RED));
65  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_B, GL_RED));
66  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_A, GL_ONE));
67  GL_CHECK(glBindTexture(GL_TEXTURE_2D, 0));
68 
69  // Create FBO chain for each miplevel.
70  framebuffers.resize(lod_levels);
71  GL_CHECK(glGenFramebuffers(lod_levels, &framebuffers[0]));
72  for (unsigned i = 0; i < lod_levels; i++)
73  {
74  GL_CHECK(glBindFramebuffer(GL_FRAMEBUFFER, framebuffers[i]));
75  GL_CHECK(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
76  GL_TEXTURE_2D, depth_texture, i));
77 
78  GL_CHECK(GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER));
79  if (status != GL_FRAMEBUFFER_COMPLETE)
80  {
81  LOGE("Framebuffer for LOD %u is incomplete!", i);
82  }
83  }
84  GL_CHECK(glBindFramebuffer(GL_FRAMEBUFFER, 0));
85 
86  GL_CHECK(glGenBuffers(1, &occluder.vertex));
87  GL_CHECK(glGenBuffers(1, &occluder.index));
88  GL_CHECK(glGenVertexArrays(1, &occluder.vao));
89 
90  // Sampler object that is used during occlusion culling.
91  // We want GL_LINEAR shadow mode (PCF), but no filtering between miplevels as we manually specify the miplevel in the compute shader.
92  GL_CHECK(glGenSamplers(1, &shadow_sampler));
93  GL_CHECK(glSamplerParameteri(shadow_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST));
94  GL_CHECK(glSamplerParameteri(shadow_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR));
95  GL_CHECK(glSamplerParameteri(shadow_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE));
96  GL_CHECK(glSamplerParameteri(shadow_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE));
97  GL_CHECK(glSamplerParameteri(shadow_sampler, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE));
98  GL_CHECK(glSamplerParameteri(shadow_sampler, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL));
99 
100  GL_CHECK(glGenBuffers(1, &uniform_buffer));
101  GL_CHECK(glBindBuffer(GL_UNIFORM_BUFFER, uniform_buffer));
102  GL_CHECK(glBufferData(GL_UNIFORM_BUFFER, sizeof(Uniforms), NULL, GL_STREAM_DRAW));
103 }
104 
105 void HiZCulling::test_bounding_boxes(GLuint counter_buffer, const unsigned *counter_offsets, unsigned num_offsets,
106  const GLuint *culled_instance_buffer, GLuint instance_data_buffer,
107  unsigned num_instances)
108 {
109  GL_CHECK(glUseProgram(culling_program));
110 
111  // Update uniform buffer.
112  GL_CHECK(glBindBuffer(GL_UNIFORM_BUFFER, uniform_buffer));
113  GL_CHECK(glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(Uniforms), &uniforms));
114  GL_CHECK(glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer));
115 
116  // Round up number of work groups.
117  // The few extra threads we spawn terminate immediately due to check against num_instances.
118  unsigned aabb_groups = (num_instances + GROUP_SIZE_AABB - 1) / GROUP_SIZE_AABB;
119  GL_CHECK(glProgramUniform1ui(culling_program, 0, num_instances));
120 
121  for (unsigned i = 0; i < num_offsets; i++)
122  {
123  GL_CHECK(glBindBufferRange(GL_ATOMIC_COUNTER_BUFFER, i, counter_buffer, counter_offsets[i], sizeof(uint32_t)));
124  GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1 + i, culled_instance_buffer[i]));
125  }
126 
127  // Bind Hi-Z depth map.
128  GL_CHECK(glActiveTexture(GL_TEXTURE0));
129  GL_CHECK(glBindTexture(GL_TEXTURE_2D, depth_texture));
130  GL_CHECK(glBindSampler(0, shadow_sampler));
131 
132  // Dispatch occlusion culling job.
133  GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, instance_data_buffer));
134  GL_CHECK(glDispatchCompute(aabb_groups, 1, 1));
135 
136  GL_CHECK(glBindSampler(0, 0));
137 
138  // We have updated instance buffer and indirect draw buffer. Memory barrier here to ensure visibility.
139  GL_CHECK(glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_COMMAND_BARRIER_BIT));
140 }
141 
142 void HiZCulling::setup_occluder_geometry(const vector<vec4> &position, const vector<uint32_t> &indices)
143 {
144  // Upload occlusion geometry to GPU. This should be mostly static.
145  GL_CHECK(glBindVertexArray(occluder.vao));
146 
147  GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, occluder.vertex));
148  GL_CHECK(glBufferData(GL_ARRAY_BUFFER, position.size() * sizeof(vec4), &position[0], GL_STATIC_DRAW));
149 
150  GL_CHECK(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, occluder.index));
151  GL_CHECK(glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.size() * sizeof(uint32_t), &indices[0], GL_STATIC_DRAW));
152 
153  GL_CHECK(glEnableVertexAttribArray(0));
154  GL_CHECK(glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, 0));
155 
156  GL_CHECK(glBindVertexArray(0));
157  GL_CHECK(glBindBuffer(GL_ARRAY_BUFFER, 0));
158  GL_CHECK(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0));
159 
160  occluder.elements = indices.size();
161 }
162 
164 {
165  GL_CHECK(glBindTexture(GL_TEXTURE_2D, 0));
166  GL_CHECK(glEnable(GL_DEPTH_TEST));
167 
168  GL_CHECK(glUseProgram(depth_render_program));
169  GL_CHECK(glBindFramebuffer(GL_FRAMEBUFFER, framebuffers[0]));
170 
171  // Render occlusion geometry to miplevel 0.
172  GL_CHECK(glBindVertexArray(occluder.vao));
173  GL_CHECK(glViewport(0, 0, DEPTH_SIZE, DEPTH_SIZE));
174  GL_CHECK(glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT));
175  GL_CHECK(glDrawElements(GL_TRIANGLES, occluder.elements, GL_UNSIGNED_INT, 0));
176 
177  GL_CHECK(glBindVertexArray(quad.get_vertex_array()));
178  GL_CHECK(glBindTexture(GL_TEXTURE_2D, depth_texture));
179  GL_CHECK(glUseProgram(depth_mip_program));
180 
181  for (unsigned lod = 1; lod < lod_levels; lod++)
182  {
183  GL_CHECK(glBindFramebuffer(GL_FRAMEBUFFER, framebuffers[lod]));
184  GL_CHECK(glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT));
185  GL_CHECK(glViewport(0, 0, DEPTH_SIZE >> lod, DEPTH_SIZE >> lod));
186 
187  // Need to do this to ensure that we cannot possibly read from the miplevel we are rendering to.
188  // Otherwise, we have undefined behavior.
189  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, lod - 1));
190  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, lod - 1));
191 
192  // Mipmap.
193  GL_CHECK(glDrawElements(GL_TRIANGLES, quad.get_num_elements(), GL_UNSIGNED_SHORT, 0));
194  }
195 
196  // Restore miplevels. MAX_LEVEL will be clamped accordingly.
197  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0));
198  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1000));
199  GL_CHECK(glBindFramebuffer(GL_FRAMEBUFFER, 0));
200 }
201 
202 void HiZCulling::set_view_projection(const mat4 &projection, const mat4 &view, const vec2 &zNearFar)
203 {
204  mat4 view_projection = projection * view;
205  GL_CHECK(glProgramUniformMatrix4fv(depth_render_program, 0, 1, GL_FALSE, value_ptr(view_projection)));
206 
207  uniforms.uVP = view_projection;
208  uniforms.uView = view;
209  uniforms.uProj = projection;
210  uniforms.zNearFar = zNearFar;
211 
212  // Compute the 6 frustum planes for frustum culling.
213  compute_frustum_from_view_projection(uniforms.planes, view_projection);
214 }
215 
217 {
218  GL_CHECK(glDeleteTextures(1, &depth_texture));
219  GL_CHECK(glDeleteProgram(depth_render_program));
220  GL_CHECK(glDeleteProgram(depth_mip_program));
221  GL_CHECK(glDeleteProgram(culling_program));
222  GL_CHECK(glDeleteFramebuffers(framebuffers.size(), &framebuffers[0]));
223 
224  GL_CHECK(glDeleteBuffers(1, &occluder.vertex));
225  GL_CHECK(glDeleteBuffers(1, &occluder.index));
226  GL_CHECK(glDeleteBuffers(1, &uniform_buffer));
227  GL_CHECK(glDeleteVertexArrays(1, &occluder.vao));
228 
229  GL_CHECK(glDeleteSamplers(1, &shadow_sampler));
230 }
231 
#define DEPTH_SIZE_LOG2
Definition: culling.hpp:70
Definition: matrix.h:28
GLuint common_compile_compute_shader_from_file(const char *cs_source)
Definition: common.cpp:317
#define GROUP_SIZE_AABB
Definition: hizculling.cpp:26
GLuint common_compile_shader_from_file(const char *vs_source, const char *fs_source)
Definition: common.cpp:241
const T::data_type * value_ptr(const T &vec)
Definition: vector_math.h:35
Matrix projection
Definition: matrix.h:75
GLsizei GLenum const void * indices
Definition: gl2ext.h:322
void setup_occluder_geometry(const std::vector< vec4 > &positions, const std::vector< uint32_t > &indices)
Definition: hizculling.cpp:142
Mesh quad
Definition: app.cpp:40
#define DEPTH_SIZE
Definition: culling.hpp:69
void set_view_projection(const mat4 &projection, const mat4 &view, const vec2 &zNearFar)
Definition: hizculling.cpp:202
#define GL_CHECK(x)
Definition: AstcTextures.h:59
void rasterize_occluders()
Definition: hizculling.cpp:163
Definition: matrix.h:104
void test_bounding_boxes(GLuint counter_buffer, const unsigned *counter_offsets, unsigned num_offsets, const GLuint *culled_instance_buffer, GLuint instance_data_buffer, unsigned num_instances)
Definition: hizculling.cpp:105
void init()
Definition: hizculling.cpp:40
#define LOGE(...)
Definition: AstcTextures.h:30
typedef GLenum(GL_APIENTRYP PFNGLGETGRAPHICSRESETSTATUSKHRPROC)(void)
GLuint program
Definition: gl2ext.h:1475
typedef GLuint(GL_APIENTRYP PFNGLGETDEBUGMESSAGELOGKHRPROC)(GLuint count