OpenGL ES SDK for Android ARM Developer Center
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
hiz_cull.cs
Go to the documentation of this file.
1 #version 310 es
2 
3 /* Copyright (c) 2014-2017, ARM Limited and Contributors
4  *
5  * SPDX-License-Identifier: MIT
6  *
7  * Permission is hereby granted, free of charge,
8  * to any person obtaining a copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation the rights to
10  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
11  * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
16  * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 // The Hi-Z occlusion culling algorithm.
24 //
25 // First, we perform a frustum test of the instance, if that fails, we return early.
26 // If it passes frustum test, we compute the screen space bounding box.
27 //
28 // We then test if the instance intersects with the near plane.
29 // If so, we cannot safely find screen space bounding box, and we simply assume the instance is visible.
30 //
31 // Based on the bounding box, we compute a LOD factor such that one PCF shadow lookup covers a region
32 // that covers the entire bounding box.
33 //
34 // If the shadow sampling returns > 0.0, at least one texel in the quad must have compared to 1.0,
35 // and we must assume the instance is visible.
36 
37 precision highp float;
38 precision highp int;
39 precision highp sampler2DShadow;
40 
41 layout(local_size_x = 64) in;
42 
43 layout(binding = 0, std140) uniform UBO
44 {
45  mat4 uVP; // View-projection
46  mat4 uView; // View
47  vec4 uProj[4]; // Projection matrix
48  vec4 uFrustum[6]; // Frustum planes for frustum test
49  vec2 zNearFar; // NearFar values for near plane intersection test.
50 };
51 
52 layout(location = 0) uniform uint uNumBoundingBoxes;
53 layout(binding = 0) uniform sampler2DShadow uDepth;
54 
55 // Atomic counters for each LOD level.
56 // The offset for instanceCount is already applied via glBindBufferRange().
57 layout(binding = 0, offset = 0) uniform atomic_uint instanceCountLOD0;
58 layout(binding = 1, offset = 0) uniform atomic_uint instanceCountLOD1;
59 layout(binding = 2, offset = 0) uniform atomic_uint instanceCountLOD2;
60 layout(binding = 3, offset = 0) uniform atomic_uint instanceCountLOD3;
61 
62 // We only care about position.
64 {
67 };
68 
69 layout(std430, binding = 0) buffer PerInstanceInput
70 {
71  readonly SphereInstance data[];
73 
74 // We only need position as instance data.
75 layout(std430, binding = 1) buffer PerInstanceOutputLOD0
76 {
77  writeonly vec4 data[];
79 
80 layout(std430, binding = 2) buffer PerInstanceOutputLOD1
81 {
82  writeonly vec4 data[];
84 
85 layout(std430, binding = 3) buffer PerInstanceOutputLOD2
86 {
87  writeonly vec4 data[];
89 
90 layout(std430, binding = 4) buffer PerInstanceOutputLOD3
91 {
92  writeonly vec4 data[];
94 
95 void append_instance(float minz)
96 {
97  // Test non-linear depth value and place the instance in the appropriate instance buffer.
98  if (minz < 0.8)
99  {
100  uint count = atomicCounterIncrement(instanceCountLOD0);
101  output_instance_lod0.data[count] = input_instance.data[gl_GlobalInvocationID.x].position;
102  }
103  else if (minz < 0.9)
104  {
105  uint count = atomicCounterIncrement(instanceCountLOD1);
106  output_instance_lod1.data[count] = input_instance.data[gl_GlobalInvocationID.x].position;
107  }
108  else if (minz < 0.95)
109  {
110  uint count = atomicCounterIncrement(instanceCountLOD2);
111  output_instance_lod2.data[count] = input_instance.data[gl_GlobalInvocationID.x].position;
112  }
113  else
114  {
115  uint count = atomicCounterIncrement(instanceCountLOD3);
116  output_instance_lod3.data[count] = input_instance.data[gl_GlobalInvocationID.x].position;
117  }
118 }
119 
120 bool frustum_test(vec3 center, float radius)
121 {
122  for (int f = 0; f < 6; f++)
123  {
124  float plane_distance = dot(uFrustum[f], vec4(center, 1.0));
125  // Bounding sphere not inside frustum. Can safely cull.
126  if (plane_distance < -radius)
127  return false;
128  }
129  return true;
130 }
131 
132 void main()
133 {
134  uint ident = gl_GlobalInvocationID.x;
135  if (ident >= uNumBoundingBoxes)
136  return;
137 
138  vec4 instance_data = input_instance.data[ident].position;
139  vec3 center = instance_data.xyz;
140  float radius = instance_data.w;
141 
142  // Test frustum, if outside, return early.
143  if (!frustum_test(center, radius))
144  return;
145 
146  // Apply view transform. Camera is pointing down the -Z axis.
147  vec3 view_center = (uView * vec4(center, 1.0)).xyz;
148  float nearest_z = view_center.z + radius;
149 
150  // Sphere clips against near plane, just assume visibility.
151  if (nearest_z >= -zNearFar.x)
152  {
153  append_instance(0.0);
154  return;
155  }
156 
157  // Find screen space bounding box. See documentation for reference to the algorithm in more detail.
158  //
159  // The idea of the algorithm is to project the sphere to horizontal and vertical planes.
160  // We then have a 2D plane with a circle.
161  // We find the tangent lines from camera to circle in the projected 2D space and use that to compute the points where we intersect the near plane.
162  //
163  // To find the tangent points, we apply Pythagorean theorem length_tangent = sqrt(length_center^2 - radius^2), use this to create a 2D rotation matrix,
164  // and rotate in both direction to find both tangent points directly. From there, we can do perspective divide to find min/max values for horizontal and vertical planes independently.
165 
166  float az_plane_horiz_length = length(view_center.xz);
167  float az_plane_vert_length = length(view_center.yz);
168  vec2 az_plane_horiz_norm = view_center.xz / az_plane_horiz_length;
169  vec2 az_plane_vert_norm = view_center.yz / az_plane_vert_length;
170 
171  vec2 t = sqrt(vec2(az_plane_horiz_length, az_plane_vert_length) * vec2(az_plane_horiz_length, az_plane_vert_length) - radius * radius);
172  vec4 w = vec4(t, radius, radius) / vec2(az_plane_horiz_length, az_plane_vert_length).xyxy;
173 
174  // Fairly optimized way to apply the two rotation matrices.
175  // Since the two rotation matrices are almost the same (just flipped sign of sin()), we can reuse some computation.
176  vec4 horiz_cos_sin = az_plane_horiz_norm.xyyx * t.x * vec4(w.xx, -w.z, w.z);
177  vec4 vert_cos_sin = az_plane_vert_norm.xyyx * t.y * vec4(w.yy, -w.w, w.w);
178 
179  vec2 horiz0 = horiz_cos_sin.xy + horiz_cos_sin.zw;
180  vec2 horiz1 = horiz_cos_sin.xy - horiz_cos_sin.zw;
181  vec2 vert0 = vert_cos_sin.xy + vert_cos_sin.zw;
182  vec2 vert1 = vert_cos_sin.xy - vert_cos_sin.zw;
183 
184  // This assumes the projection matrix doesn't do translations or any other transforms first.
185  vec4 projected = -0.5 * vec4(uProj[0][0], uProj[0][0], uProj[1][1], uProj[1][1]) *
186  vec4(horiz0.x, horiz1.x, vert0.x, vert1.x) /
187  vec4(horiz0.y, horiz1.y, vert0.y, vert1.y) + 0.5;
188 
189  // Since we know which way we're rotating to find the tangent points, we already know which one is min and max.
190  vec2 min_xy = projected.yw;
191  vec2 max_xy = projected.xz;
192 
193  // Project our nearest Z value in view space.
194  vec2 zw = mat2(uProj[2].zw, uProj[3].zw) * vec2(nearest_z, 1.0);
195  nearest_z = 0.5 * zw.x / zw.y + 0.5;
196 
197  // Compute required LOD factor for shadow lookup.
198  vec2 diff_pix = (max_xy - min_xy) * vec2(textureSize(uDepth, 0));
199  float max_diff = max(max(diff_pix.x, diff_pix.y), 1.0);
200  float lod = ceil(log2(max_diff));
201 
202  vec2 mid_pix = 0.5 * (max_xy + min_xy);
203 
204  // Test visibility.
205  if (textureLod(uDepth, vec3(mid_pix, nearest_z), lod) > 0.0)
206  append_instance(nearest_z);
207 }
208 
vec4 velocity
Definition: hiz_cull.cs:66
void main()
Definition: hiz_cull.cs:132
Definition: matrix.h:51
layout(local_size_x=64) in
output_instance_lod0
Definition: hiz_cull.cs:78
Definition: matrix.h:28
GLenum GLuint GLintptr offset
Definition: gl2ext.h:629
precision highp int
Definition: hiz_cull.cs:38
bool frustum_test(vec3 center, float radius)
Definition: hiz_cull.cs:120
output_instance_lod1
Definition: hiz_cull.cs:83
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
Definition: gl2ext.h:179
Definition: matrix.h:75
GLenum GLenum GLsizei count
Definition: gl2ext.h:133
output_instance_lod3
Definition: hiz_cull.cs:93
vec4 position
Definition: hiz_cull.cs:65
void append_instance(float minz)
Definition: hiz_cull.cs:95
GLfloat GLfloat GLfloat w
Definition: gl2ext.h:2701
GLint location
Definition: gl2ext.h:180
output_instance_lod2
Definition: hiz_cull.cs:88
GLfloat GLfloat f
Definition: gl2ext.h:2707
float y
Definition: matrix.h:31
float w
Definition: matrix.h:80
float max(float x, float y)
Definition: noise.cpp:29
void uniform(string name, const mat4 &v)
Definition: glutil.cpp:97
GLint GLint GLint GLint GLint x
Definition: gl2ext.h:574
GLenum GLuint GLenum GLsizei length
Definition: gl2ext.h:134
precision highp sampler2DShadow
Definition: hiz_cull.cs:39
Definition: matrix.h:104
GLenum GLuint buffer
Definition: gl2ext.h:628
float z
Definition: matrix.h:79
precision highp float
Definition: hiz_cull.cs:37
input_instance
Definition: hiz_cull.cs:72
float x
Definition: matrix.h:30