OpenGL ES SDK for Android ARM Developer Center
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fftwater.cpp
Go to the documentation of this file.
1 /* Copyright (c) 2015-2017, ARM Limited and Contributors
2  *
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge,
6  * to any person obtaining a copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation the rights to
8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
9  * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
14  * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
17  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19  */
20 
21 #define FFT_FP16 1
22 
23 #include "vector_math.h"
24 
25 #include "fftwater.hpp"
26 #include <cmath>
27 #include <fstream>
28 #include <algorithm>
29 
30 using namespace std;
31 using namespace GLFFT;
32 
34  float amplitude,
35  vec2 wind_velocity,
36  uvec2 resolution,
37  vec2 size,
38  vec2 normalmap_freq_mod)
39  :
40  wind_velocity(wind_velocity),
41  wind_dir(vec_normalize(wind_velocity)),
42  Nx(resolution.x), Nz(resolution.y), size(size), size_normal(size / normalmap_freq_mod)
43 {
44  // Factor in Phillips spectrum.
45  L = vec_dot(wind_velocity, wind_velocity) / G;
46 
47  // Use half-res for displacementmap since it's so low-resolution.
49 
50  distribution.resize(Nx * Nz);
51  distribution_normal.resize(Nx * Nz);
52 
53  // Normalize amplitude a bit based on the heightmap size.
54  amplitude *= 0.3f / sqrt(size.x * size.y);
55 
56  generate_distribution(distribution.data(), size, amplitude, 0.02f);
58  amplitude * sqrt(normalmap_freq_mod.x * normalmap_freq_mod.y), 0.02f);
59 
62 
63  // Check if we can render to FP16, if so, we can do mipmaping of FP16 in fragment instead where appropriate.
64  mipmap_fp16 = common_has_extension("GL_EXT_color_buffer_half_float");
65 
66  init_gl_fft();
67 }
68 
69 static inline int alias(int x, int N)
70 {
71  if (x > N / 2)
72  x -= N;
73  return x;
74 }
75 
76 void FFTWater::downsample_distribution(cfloat *out, const cfloat *in, unsigned rate_log2)
77 {
78  // Pick out the lower frequency samples only which is the same as downsampling "perfectly".
79  unsigned out_width = Nx >> rate_log2;
80  unsigned out_height = Nz >> rate_log2;
81 
82  for (unsigned z = 0; z < out_height; z++)
83  {
84  for (unsigned x = 0; x < out_width; x++)
85  {
86  int alias_x = alias(x, out_width);
87  int alias_z = alias(z, out_height);
88 
89  if (alias_x < 0)
90  {
91  alias_x += Nx;
92  }
93 
94  if (alias_z < 0)
95  {
96  alias_z += Nz;
97  }
98 
99  out[z * out_width + x] = in[alias_z * Nx + alias_x];
100  }
101  }
102 }
103 
105 {
106  // See Tessendorf paper for details.
107  float k_len = vec_length(k);
108  if (k_len == 0.0f)
109  {
110  return 0.0f;
111  }
112 
113  float kL = k_len * L;
114  vec2 k_dir = vec_normalize(k);
115  float kw = vec_dot(k_dir, wind_dir);
116 
117  return
118  pow(kw * kw, 1.0f) * // Directional
119  exp(-1.0 * k_len * k_len * max_l * max_l) * // Suppress small waves at ~max_l.
120  exp(-1.0f / (kL * kL)) *
121  pow(k_len, -4.0f);
122 }
123 
124 void FFTWater::generate_distribution(cfloat *distribution, vec2 size, float amplitude, float max_l)
125 {
126  // Modifier to find spatial frequency.
127  vec2 mod = vec2(2.0f * M_PI) / size;
128 
129  for (unsigned z = 0; z < Nz; z++)
130  {
131  for (unsigned x = 0; x < Nx; x++)
132  {
133  auto &v = distribution[z * Nx + x];
134  vec2 k = mod * vec2(alias(x, Nx), alias(z, Nz));
135 
136  // Gaussian distributed noise with unit variance.
138  v = dist * amplitude * sqrt(0.5f * phillips(k, max_l));
139  }
140  }
141 }
142 
144 {
145  vec2 mod = vec2(2.0f * M_PI) / size;
146  vec2 mod_normal = vec2(2.0f * M_PI) / size_normal;
147 
148  // Generate new FFTs
149  GL_CHECK(glUseProgram(prog_generate_height.get()));
150  GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, distribution_buffer.get()));
151  GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, freq_height.get()));
152  GL_CHECK(glUniform2f(0, mod.x, mod.y));
153  GL_CHECK(glUniform1f(1, time));
154  GL_CHECK(glUniform2ui(2, Nx, Nz));
155  // We only need to generate half the frequencies due to C2R transform.
156  GL_CHECK(glDispatchCompute(Nx / 64, Nz, 1));
157 
158  GL_CHECK(glUseProgram(prog_generate_displacement.get()));
159  GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, distribution_buffer_displacement.get()));
160  GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, freq_displacement.get()));
161  GL_CHECK(glUniform2f(0, mod.x, mod.y));
162  GL_CHECK(glUniform1f(1, time));
163  GL_CHECK(glDispatchCompute((Nx >> displacement_downsample) / 64, (Nz >> displacement_downsample), 1));
164 
165  GL_CHECK(glUseProgram(prog_generate_normal.get()));
166  GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, distribution_buffer_normal.get()));
167  GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, freq_normal.get()));
168  GL_CHECK(glUniform2f(0, mod_normal.x, mod_normal.y));
169  GL_CHECK(glUniform1f(1, time));
170  GL_CHECK(glDispatchCompute(Nx / 64, Nz, 1));
171 
172  // The three compute jobs above are independent so we only need to barrier here.
173  GL_CHECK(glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT));
174 }
175 
177 {
178  // Compute the iFFT
179  // Ping-pong the textures we use so we can run fragment and compute in parallel without triggering lots of extra driver work.
180  texture_index ^= 1;
181  fft_height->process(heightmap[texture_index].get(), freq_height.get());
183  fft_normal->process(normalmap[texture_index].get(), freq_normal.get());
184  GL_CHECK(glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT));
185 }
186 
188 {
189  // Mipmap heightmap in compute.
190  // If we mipmap with fragment, we will have to wait for previous frame to complete rendering first.
191  // This creates a stall where vertex shading will run without any fragment processing active, which is very bad for pipelining.
192  // We also cannot use default mipmapping anyways,
193  // since we want to treat (0, 0) as top-left pixel for heightmap/displacementmap
194  // and apply half-texel offsets as needed.
195  //
196  // While we don't need to mipmap normalmap and gradientjacobian in compute, implement this as a fallback
197  // if FP16 rendering extension is not supported.
198  if (mipmap_fp16)
199  {
200  GL_CHECK(glBindTexture(GL_TEXTURE_2D, gradientjacobianmap[texture_index].get()));
201  GL_CHECK(glGenerateMipmap(GL_TEXTURE_2D));
202  GL_CHECK(glBindTexture(GL_TEXTURE_2D, normalmap[texture_index].get()));
203  GL_CHECK(glGenerateMipmap(GL_TEXTURE_2D));
204  }
205 
206  // Do not output to the two highest mipmap levels.
207  for (unsigned l = 0; (Nx >> l) >= 8 && (Nz >> l) >= 8; l++)
208  {
209  // Fallback path.
210  if (!mipmap_fp16)
211  {
212  // There is no rg16f image format, just use R32UI reinterpretation which is the same thing.
214  Nx >> l, Nz >> l, l + 1);
216  Nx >> l, Nz >> l, l + 1);
217  }
218 
219  compute_mipmap(prog_mipmap_height, heightdisplacementmap[texture_index], GL_RGBA16F, Nx >> l, Nz >> l, l + 1);
220 
221  // Avoid memory barriers for every dispatch since we can compute 3 separate miplevels before flushing load-store caches.
222  GL_CHECK(glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT));
223  }
224 }
225 
227 {
228  update_phase(time);
229  compute_ifft();
230  // Generate final textures ready for vertex and fragment shading.
233 }
234 
236 {
237  GL_CHECK(glUseProgram(prog_bake_height_gradient.get()));
238 
239  GL_CHECK(glActiveTexture(GL_TEXTURE0));
240  GL_CHECK(glBindTexture(GL_TEXTURE_2D, heightmap[texture_index].get()));
241  GL_CHECK(glActiveTexture(GL_TEXTURE1));
242  GL_CHECK(glBindTexture(GL_TEXTURE_2D, displacementmap[texture_index].get()));
243 
244  // Height and displacement are sampled in vertex shaders only, so stick them together.
245  GL_CHECK(glBindImageTexture(0, heightdisplacementmap[texture_index].get(), 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA16F));
246 
247  // Gradients from heightmap and the jacobian are only sampled in fragment, so group them together.
248  GL_CHECK(glBindImageTexture(1, gradientjacobianmap[texture_index].get(), 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA16F));
249 
250  GL_CHECK(glUniform4f(0,
251  1.0f / Nx, 1.0f / Nz,
252  1.0f / (Nx >> displacement_downsample),
253  1.0f / (Nz >> displacement_downsample)));
254  GL_CHECK(glUniform4f(1,
255  Nx / size.x, Nz / size.y,
257  (Nz >> displacement_downsample) / size.y));
258 
259  GL_CHECK(glDispatchCompute(Nx / 8, Nz / 8, 1));
260  GL_CHECK(glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT));
261 }
262 
263 void FFTWater::compute_mipmap(const Program &program, const Texture &texture, GLenum format, unsigned Nx, unsigned Nz, unsigned l)
264 {
265  GL_CHECK(glActiveTexture(GL_TEXTURE0));
266  GL_CHECK(glBindTexture(GL_TEXTURE_2D, texture.get()));
267 
268  GLint min_filter;
269  glGetTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, &min_filter);
270  // Make sure we're not sampling from the level we're trying to write to.
271  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST));
272 
273  GL_CHECK(glUseProgram(program.get()));
274 
275  GL_CHECK(glBindImageTexture(0, texture.get(), l, GL_FALSE, 0, GL_WRITE_ONLY, format));
276  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, l - 1));
277  GL_CHECK(glUniform1i(0, l - 1));
278  GL_CHECK(glUniform2f(1, 1.0f / Nx, 1.0f / Nz));
279 
280  unsigned threads_x = Nx / 2;
281  unsigned threads_z = Nz / 2;
282  GL_CHECK(glDispatchCompute(threads_x / 4, threads_z / 4, 1));
283 
284  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, 1000));
285  GL_CHECK(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, min_filter));
286 }
287 
288 void FFTWater::init_texture(Texture &tex, GLenum format, unsigned levels, unsigned width, unsigned height,
289  GLenum mag_filter, GLenum min_filter)
290 {
291  tex.init(width, height, levels, format, GL_REPEAT, GL_REPEAT, min_filter, mag_filter);
292 }
293 
295 {
296  // Compile compute shaders.
298  prog_generate_displacement = Program(common_compile_compute_shader_from_file("water_generate_displacement.comp"));
300 
305 
306  auto cache = make_shared<ProgramCache>();
307 
308  // Use FP16 FFT.
309  FFTOptions options;
310  options.type.fp16 = FFT_FP16;
311  options.type.input_fp16 = FFT_FP16;
312  options.type.output_fp16 = FFT_FP16;
313 
314  // Sensible default values for Mali.
315  options.performance.workgroup_size_x = 8;
316  options.performance.workgroup_size_y = 4;
317  options.performance.vector_size = 4;
318  options.performance.shared_banked = false;
319 
320  // Create three FFTs for heightmap, displacementmap and high-frequency normals.
321  fft_height = unique_ptr<FFT>(new FFT(Nx, Nz,
322  ComplexToReal, Inverse, SSBO, ImageReal, cache, options));
324  ComplexToComplex, Inverse, SSBO, Image, cache, options));
325  fft_normal = unique_ptr<FFT>(new FFT(Nx, Nz,
326  ComplexToComplex, Inverse, SSBO, Image, move(cache), options));
327 
328  normal_levels = unsigned(log2(max(float(Nx), float(Nz)))) + 1;
329 
330  for (unsigned i = 0; i < 2; i++)
331  {
332  // R32F since GLES 3.1 does not support r16f format for image load/store.
334  GL_R32F, 1,
335  Nx, Nz,
336  GL_NEAREST,
337  GL_NEAREST);
338 
340  GL_RG16F, 1,
342  GL_LINEAR,
343  GL_LINEAR);
344 
345  // Ignore the two highest mipmap levels, since we would like to avoid micro dispatches that just write 1 texel.
346  init_texture(normalmap[i], GL_RG16F, normal_levels - 2, Nx, Nz, GL_LINEAR, GL_LINEAR_MIPMAP_LINEAR);
347 
349  GL_RGBA16F, normal_levels - 2,
350  Nx, Nz,
351  GL_LINEAR,
352  GL_LINEAR_MIPMAP_NEAREST);
353 
355  GL_RGBA16F, normal_levels - 2,
356  Nx, Nz,
357  GL_LINEAR,
358  GL_LINEAR_MIPMAP_LINEAR);
359  }
360 
361  distribution_buffer.init(distribution.data(), Nx * Nz * sizeof(cfloat), GL_STATIC_COPY);
363  (Nx * Nz * sizeof(cfloat)) >> (displacement_downsample * 2),
364  GL_STATIC_COPY);
365  distribution_buffer_normal.init(distribution_normal.data(), Nx * Nz * sizeof(cfloat), GL_STATIC_COPY);
366 
367  distribution.clear();
369  distribution_normal.clear();
370 
371  // Copy distributions to the GPU.
372  freq_height.init(nullptr, (Nx * Nz * sizeof(cfloat)) >> FFT_FP16, GL_STREAM_COPY);
373  freq_normal.init(nullptr, (Nx * Nz * sizeof(cfloat)) >> FFT_FP16, GL_STREAM_COPY);
374  freq_displacement.init(nullptr, ((Nx * Nz * sizeof(cfloat)) >> FFT_FP16) >> (displacement_downsample * 2), GL_STREAM_COPY);
375 }
376 
GLFFT::Buffer freq_height
Definition: fftwater.hpp:83
GLuint get() const
const GLfloat * v
Definition: gl2ext.h:2231
void update_phase(float time)
Definition: fftwater.cpp:143
GLFFT::Program prog_mipmap_height
Definition: fftwater.hpp:65
Inverse FFT transform.
GLFFT::Texture normalmap[2]
Definition: fftwater.hpp:71
#define FFT_FP16
Definition: fftwater.cpp:21
std::normal_distribution< float > normal_dist
Definition: fftwater.hpp:52
unsigned displacement_downsample
Definition: fftwater.hpp:78
GLFFT::Buffer freq_displacement
Definition: fftwater.hpp:84
FFTWater(float amplitude, vec2 wind_velocity, uvec2 resolution, vec2 size, vec2 normalmap_freq_mod)
Definition: fftwater.cpp:33
std::unique_ptr< GLFFT::FFT > fft_displacement
Definition: fftwater.hpp:87
void generate_mipmaps()
Definition: fftwater.cpp:187
std::unique_ptr< GLFFT::FFT > fft_normal
Definition: fftwater.hpp:88
float vec_dot(const T &a, const T &b)
Definition: vector_math.h:289
void init_texture(GLFFT::Texture &tex, GLenum format, unsigned levels, unsigned width, unsigned height, GLenum mag_filter, GLenum min_filter)
Definition: fftwater.cpp:288
Definition: Native.cpp:67
GLint GLsizei GLsizei height
Definition: gl2ext.h:179
Definition: matrix.h:28
vec2 size
Definition: fftwater.hpp:42
GLuint common_compile_compute_shader_from_file(const char *cs_source)
Definition: common.cpp:317
cfloat phillips(vec2 k, float max_l)
Definition: fftwater.cpp:104
vec2 wind_dir
Definition: fftwater.hpp:40
Complex-to-real transform. N / 2 + 1 complex values are used per row with a stride of N complex sampl...
std::unique_ptr< GLFFT::FFT > fft_height
Definition: fftwater.hpp:86
GLint GLsizei GLsizei GLenum format
Definition: gl2ext.h:179
void compute_ifft()
Definition: fftwater.cpp:176
void init_gl_fft()
Definition: fftwater.cpp:294
vec2 size_normal
Definition: fftwater.hpp:42
GLFFT::Texture displacementmap[2]
Definition: fftwater.hpp:70
bool fp16
Whether internal shader should be mediump float.
GLFFT::Texture heightmap[2]
Definition: fftwater.hpp:69
unsigned Nx
Definition: fftwater.hpp:41
void downsample_distribution(cfloat *out, const cfloat *in, unsigned rate_log2)
Definition: fftwater.cpp:76
GLFFT::Program prog_mipmap_gradient_jacobian
Definition: fftwater.hpp:67
float vec_length(const T &vec)
Definition: vector_math.h:298
unsigned normal_levels
Definition: fftwater.hpp:77
std::vector< cfloat > distribution_normal
Definition: fftwater.hpp:58
unsigned texture_index
Definition: fftwater.hpp:76
GLFFT::Program prog_generate_displacement
Definition: fftwater.hpp:62
GLuint get() const
Options for FFT implementation. Defaults for performance as conservative.
float L
Definition: fftwater.hpp:43
void init(const void *data, size_t size, GLenum access)
GLfloat GLfloat f
Definition: gl2ext.h:2707
GLFFT::Buffer freq_normal
Definition: fftwater.hpp:85
void generate_distribution(cfloat *distribution, vec2 size, float amplitude, float max_l)
Definition: fftwater.cpp:124
float y
Definition: matrix.h:31
static int alias(int x, int N)
Definition: fftwater.cpp:69
#define M_PI
The value of pi.
Definition: Mathematics.h:37
bool common_has_extension(const char *ext)
Definition: common.hpp:57
#define GL_CHECK(x)
Definition: AstcTextures.h:59
GLFFT::Texture heightdisplacementmap[2]
Definition: fftwater.hpp:73
GL_SHADER_STORAGE_BUFFER.
GLsizei levels
Definition: gl2ext.h:1816
GLFFT::Buffer distribution_buffer_displacement
Definition: fftwater.hpp:81
Functions for working with textures.
Definition: Texture.h:39
GLenum GLuint texture
Definition: gl2ext.h:385
float max(float x, float y)
Definition: noise.cpp:29
Regular complex-to-complex transform.
GLFFT::Program prog_generate_normal
Definition: fftwater.hpp:61
GLenum GLuint GLintptr GLsizeiptr size
Definition: gl2ext.h:629
GLFFT::Buffer distribution_buffer
Definition: fftwater.hpp:80
GLFFT::Program prog_mipmap_normal
Definition: fftwater.hpp:66
GLint GLint GLint GLint GLint x
Definition: gl2ext.h:574
unsigned Nz
Definition: fftwater.hpp:41
bool mipmap_fp16
Definition: fftwater.hpp:94
T vec_normalize(const T &vec)
Definition: vector_math.h:304
std::vector< cfloat > distribution
Definition: fftwater.hpp:56
static constexpr float G
Definition: fftwater.hpp:54
GLint GLsizei width
Definition: gl2ext.h:179
std::default_random_engine engine
Definition: fftwater.hpp:53
float x
Definition: matrix.h:30
void bake_height_gradient()
Definition: fftwater.cpp:235
GLFFT::Buffer distribution_buffer_normal
Definition: fftwater.hpp:82
typedef GLenum(GL_APIENTRYP PFNGLGETGRAPHICSRESETSTATUSKHRPROC)(void)
GLFFT::Texture gradientjacobianmap[2]
Definition: fftwater.hpp:74
GLuint program
Definition: gl2ext.h:1475
void compute_mipmap(const GLFFT::Program &program, const GLFFT::Texture &texture, GLenum format, unsigned Nx, unsigned Nz, unsigned level)
Definition: fftwater.cpp:263
GLFFT::Program prog_generate_height
Definition: fftwater.hpp:60
std::complex< float > cfloat
Definition: fftwater.hpp:32
#define N
Definition: geometry.cpp:25
struct GLFFT::FFTOptions::Type type
std::vector< cfloat > distribution_displacement
Definition: fftwater.hpp:57
GLFFT::Program prog_bake_height_gradient
Definition: fftwater.hpp:64
GLint y
Definition: gl2ext.h:179
void update(float time)
Definition: fftwater.cpp:226
uniform float time
Definition: spawn.cs:50