|
| 1 | +// Bloom works by creating an intermediate texture with a bunch of mip levels, each half the size of the previous. |
| 2 | +// You then downsample each mip (starting with the original texture) to the lower resolution mip under it, going in order. |
| 3 | +// You then upsample each mip (starting from the smallest mip) and blend with the higher resolution mip above it (ending on the original texture). |
| 4 | +// |
| 5 | +// References: |
| 6 | +// * [COD] - Next Generation Post Processing in Call of Duty - http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare |
| 7 | +// * [PBB] - Physically Based Bloom - https://learnopengl.com/Guest-Articles/2022/Phys.-Based-Bloom |
| 8 | + |
1 | 9 | #import bevy_core_pipeline::fullscreen_vertex_shader |
2 | 10 |
|
3 | 11 | struct BloomUniforms { |
4 | | - threshold: f32, |
5 | | - knee: f32, |
6 | | - scale: f32, |
7 | | - intensity: f32, |
| 12 | + threshold_precomputations: vec4<f32>, |
8 | 13 | viewport: vec4<f32>, |
| 14 | + aspect: f32, |
9 | 15 | }; |
10 | 16 |
|
11 | 17 | @group(0) @binding(0) |
12 | | -var original: texture_2d<f32>; |
| 18 | +var input_texture: texture_2d<f32>; |
13 | 19 | @group(0) @binding(1) |
14 | | -var original_sampler: sampler; |
| 20 | +var s: sampler; |
| 21 | + |
15 | 22 | @group(0) @binding(2) |
16 | 23 | var<uniform> uniforms: BloomUniforms; |
17 | | -@group(0) @binding(3) |
18 | | -var up: texture_2d<f32>; |
19 | 24 |
|
20 | | -fn quadratic_threshold(color: vec4<f32>, threshold: f32, curve: vec3<f32>) -> vec4<f32> { |
21 | | - let br = max(max(color.r, color.g), color.b); |
22 | | - |
23 | | - var rq: f32 = clamp(br - curve.x, 0.0, curve.y); |
24 | | - rq = curve.z * rq * rq; |
25 | | - |
26 | | - return color * max(rq, br - threshold) / max(br, 0.0001); |
| 25 | +#ifdef FIRST_DOWNSAMPLE |
| 26 | +// https://catlikecoding.com/unity/tutorials/advanced-rendering/bloom/#3.4 |
| 27 | +fn soft_threshold(color: vec3<f32>) -> vec3<f32> { |
| 28 | + let brightness = max(color.r, max(color.g, color.b)); |
| 29 | + var softness = brightness - uniforms.threshold_precomputations.y; |
| 30 | + softness = clamp(softness, 0.0, uniforms.threshold_precomputations.z); |
| 31 | + softness = softness * softness * uniforms.threshold_precomputations.w; |
| 32 | + var contribution = max(brightness - uniforms.threshold_precomputations.x, softness); |
| 33 | + contribution /= max(brightness, 0.00001); // Prevent division by 0 |
| 34 | + return color * contribution; |
27 | 35 | } |
| 36 | +#endif |
28 | 37 |
|
29 | | -// Samples original around the supplied uv using a filter. |
30 | | -// |
31 | | -// o o o |
32 | | -// o o |
33 | | -// o o o |
34 | | -// o o |
35 | | -// o o o |
36 | | -// |
37 | | -// This is used because it has a number of advantages that |
38 | | -// outweigh the cost of 13 samples that basically boil down |
39 | | -// to it looking better. |
40 | | -// |
41 | | -// These advantages are outlined in a youtube video by the Cherno: |
42 | | -// https://www.youtube.com/watch?v=tI70-HIc5ro |
43 | | -fn sample_13_tap(uv: vec2<f32>, scale: vec2<f32>) -> vec4<f32> { |
44 | | - let a = textureSample(original, original_sampler, uv + vec2<f32>(-1.0, -1.0) * scale); |
45 | | - let b = textureSample(original, original_sampler, uv + vec2<f32>(0.0, -1.0) * scale); |
46 | | - let c = textureSample(original, original_sampler, uv + vec2<f32>(1.0, -1.0) * scale); |
47 | | - let d = textureSample(original, original_sampler, uv + vec2<f32>(-0.5, -0.5) * scale); |
48 | | - let e = textureSample(original, original_sampler, uv + vec2<f32>(0.5, -0.5) * scale); |
49 | | - let f = textureSample(original, original_sampler, uv + vec2<f32>(-1.0, 0.0) * scale); |
50 | | - let g = textureSample(original, original_sampler, uv + vec2<f32>(0.0, 0.0) * scale); |
51 | | - let h = textureSample(original, original_sampler, uv + vec2<f32>(1.0, 0.0) * scale); |
52 | | - let i = textureSample(original, original_sampler, uv + vec2<f32>(-0.5, 0.5) * scale); |
53 | | - let j = textureSample(original, original_sampler, uv + vec2<f32>(0.5, 0.5) * scale); |
54 | | - let k = textureSample(original, original_sampler, uv + vec2<f32>(-1.0, 1.0) * scale); |
55 | | - let l = textureSample(original, original_sampler, uv + vec2<f32>(0.0, 1.0) * scale); |
56 | | - let m = textureSample(original, original_sampler, uv + vec2<f32>(1.0, 1.0) * scale); |
57 | | - |
58 | | - let div = (1.0 / 4.0) * vec2<f32>(0.5, 0.125); |
59 | | - |
60 | | - var o: vec4<f32> = (d + e + i + j) * div.x; |
61 | | - o = o + (a + b + g + f) * div.y; |
62 | | - o = o + (b + c + h + g) * div.y; |
63 | | - o = o + (f + g + l + k) * div.y; |
64 | | - o = o + (g + h + m + l) * div.y; |
65 | | - |
66 | | - return o; |
| 38 | +// luminance coefficients from Rec. 709. |
| 39 | +// https://en.wikipedia.org/wiki/Rec._709 |
| 40 | +fn tonemapping_luminance(v: vec3<f32>) -> f32 { |
| 41 | + return dot(v, vec3<f32>(0.2126, 0.7152, 0.0722)); |
67 | 42 | } |
68 | 43 |
|
69 | | -// Samples original using a 3x3 tent filter. |
70 | | -// |
71 | | -// NOTE: Use a 2x2 filter for better perf, but 3x3 looks better. |
72 | | -fn sample_original_3x3_tent(uv: vec2<f32>, scale: vec2<f32>) -> vec4<f32> { |
73 | | - let d = vec4<f32>(1.0, 1.0, -1.0, 0.0); |
74 | | - |
75 | | - var s: vec4<f32> = textureSample(original, original_sampler, uv - d.xy * scale); |
76 | | - s = s + textureSample(original, original_sampler, uv - d.wy * scale) * 2.0; |
77 | | - s = s + textureSample(original, original_sampler, uv - d.zy * scale); |
78 | | - |
79 | | - s = s + textureSample(original, original_sampler, uv + d.zw * scale) * 2.0; |
80 | | - s = s + textureSample(original, original_sampler, uv) * 4.0; |
81 | | - s = s + textureSample(original, original_sampler, uv + d.xw * scale) * 2.0; |
| 44 | +fn rgb_to_srgb_simple(color: vec3<f32>) -> vec3<f32> { |
| 45 | + return pow(color, vec3<f32>(1.0 / 2.2)); |
| 46 | +} |
82 | 47 |
|
83 | | - s = s + textureSample(original, original_sampler, uv + d.zy * scale); |
84 | | - s = s + textureSample(original, original_sampler, uv + d.wy * scale) * 2.0; |
85 | | - s = s + textureSample(original, original_sampler, uv + d.xy * scale); |
| 48 | +// http://graphicrants.blogspot.com/2013/12/tone-mapping.html |
| 49 | +fn karis_average(color: vec3<f32>) -> f32 { |
| 50 | + // Luminance calculated by gamma-correcting linear RGB to non-linear sRGB using pow(color, 1.0 / 2.2) |
| 51 | + // and then calculating luminance based on Rec. 709 color primaries. |
| 52 | + let luma = tonemapping_luminance(rgb_to_srgb_simple(color)) / 4.0; |
| 53 | + return 1.0 / (1.0 + luma); |
| 54 | +} |
86 | 55 |
|
87 | | - return s / 16.0; |
| 56 | +// [COD] slide 153 |
| 57 | +fn sample_input_13_tap(uv: vec2<f32>) -> vec3<f32> { |
| 58 | + let a = textureSample(input_texture, s, uv, vec2<i32>(-2, 2)).rgb; |
| 59 | + let b = textureSample(input_texture, s, uv, vec2<i32>(0, 2)).rgb; |
| 60 | + let c = textureSample(input_texture, s, uv, vec2<i32>(2, 2)).rgb; |
| 61 | + let d = textureSample(input_texture, s, uv, vec2<i32>(-2, 0)).rgb; |
| 62 | + let e = textureSample(input_texture, s, uv).rgb; |
| 63 | + let f = textureSample(input_texture, s, uv, vec2<i32>(2, 0)).rgb; |
| 64 | + let g = textureSample(input_texture, s, uv, vec2<i32>(-2, -2)).rgb; |
| 65 | + let h = textureSample(input_texture, s, uv, vec2<i32>(0, -2)).rgb; |
| 66 | + let i = textureSample(input_texture, s, uv, vec2<i32>(2, -2)).rgb; |
| 67 | + let j = textureSample(input_texture, s, uv, vec2<i32>(-1, 1)).rgb; |
| 68 | + let k = textureSample(input_texture, s, uv, vec2<i32>(1, 1)).rgb; |
| 69 | + let l = textureSample(input_texture, s, uv, vec2<i32>(-1, -1)).rgb; |
| 70 | + let m = textureSample(input_texture, s, uv, vec2<i32>(1, -1)).rgb; |
| 71 | + |
| 72 | +#ifdef FIRST_DOWNSAMPLE |
| 73 | + // [COD] slide 168 |
| 74 | + // |
| 75 | + // The first downsample pass reads from the rendered frame which may exhibit |
| 76 | + // 'fireflies' (individual very bright pixels) that should not cause the bloom effect. |
| 77 | + // |
| 78 | + // The first downsample uses a firefly-reduction method proposed by Brian Karis |
| 79 | + // which takes a weighted-average of the samples to limit their luma range to [0, 1]. |
| 80 | + // This implementation matches the LearnOpenGL article [PBB]. |
| 81 | + var group0 = (a + b + d + e) * (0.125f / 4.0f); |
| 82 | + var group1 = (b + c + e + f) * (0.125f / 4.0f); |
| 83 | + var group2 = (d + e + g + h) * (0.125f / 4.0f); |
| 84 | + var group3 = (e + f + h + i) * (0.125f / 4.0f); |
| 85 | + var group4 = (j + k + l + m) * (0.5f / 4.0f); |
| 86 | + group0 *= karis_average(group0); |
| 87 | + group1 *= karis_average(group1); |
| 88 | + group2 *= karis_average(group2); |
| 89 | + group3 *= karis_average(group3); |
| 90 | + group4 *= karis_average(group4); |
| 91 | + return group0 + group1 + group2 + group3 + group4; |
| 92 | +#else |
| 93 | + var sample = (a + c + g + i) * 0.03125; |
| 94 | + sample += (b + d + f + h) * 0.0625; |
| 95 | + sample += (e + j + k + l + m) * 0.125; |
| 96 | + return sample; |
| 97 | +#endif |
88 | 98 | } |
89 | 99 |
|
90 | | -@fragment |
91 | | -fn downsample_prefilter(@location(0) output_uv: vec2<f32>) -> @location(0) vec4<f32> { |
92 | | - let sample_uv = uniforms.viewport.xy + output_uv * uniforms.viewport.zw; |
93 | | - let texel_size = 1.0 / vec2<f32>(textureDimensions(original)); |
| 100 | +// [COD] slide 162 |
| 101 | +fn sample_input_3x3_tent(uv: vec2<f32>) -> vec3<f32> { |
| 102 | + // Radius. Empirically chosen by and tweaked from the LearnOpenGL article. |
| 103 | + let x = 0.004 / uniforms.aspect; |
| 104 | + let y = 0.004; |
94 | 105 |
|
95 | | - let scale = texel_size; |
| 106 | + let a = textureSample(input_texture, s, vec2<f32>(uv.x - x, uv.y + y)).rgb; |
| 107 | + let b = textureSample(input_texture, s, vec2<f32>(uv.x, uv.y + y)).rgb; |
| 108 | + let c = textureSample(input_texture, s, vec2<f32>(uv.x + x, uv.y + y)).rgb; |
96 | 109 |
|
97 | | - let curve = vec3<f32>( |
98 | | - uniforms.threshold - uniforms.knee, |
99 | | - uniforms.knee * 2.0, |
100 | | - 0.25 / uniforms.knee, |
101 | | - ); |
| 110 | + let d = textureSample(input_texture, s, vec2<f32>(uv.x - x, uv.y)).rgb; |
| 111 | + let e = textureSample(input_texture, s, vec2<f32>(uv.x, uv.y)).rgb; |
| 112 | + let f = textureSample(input_texture, s, vec2<f32>(uv.x + x, uv.y)).rgb; |
102 | 113 |
|
103 | | - var o: vec4<f32> = sample_13_tap(sample_uv, scale); |
| 114 | + let g = textureSample(input_texture, s, vec2<f32>(uv.x - x, uv.y - y)).rgb; |
| 115 | + let h = textureSample(input_texture, s, vec2<f32>(uv.x, uv.y - y)).rgb; |
| 116 | + let i = textureSample(input_texture, s, vec2<f32>(uv.x + x, uv.y - y)).rgb; |
104 | 117 |
|
105 | | - o = quadratic_threshold(o, uniforms.threshold, curve); |
106 | | - o = max(o, vec4<f32>(0.00001)); |
| 118 | + var sample = e * 0.25; |
| 119 | + sample += (b + d + f + h) * 0.125; |
| 120 | + sample += (a + c + g + i) * 0.0625; |
107 | 121 |
|
108 | | - return o; |
| 122 | + return sample; |
109 | 123 | } |
110 | 124 |
|
| 125 | +#ifdef FIRST_DOWNSAMPLE |
111 | 126 | @fragment |
112 | | -fn downsample(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> { |
113 | | - let texel_size = 1.0 / vec2<f32>(textureDimensions(original)); |
| 127 | +fn downsample_first(@location(0) output_uv: vec2<f32>) -> @location(0) vec4<f32> { |
| 128 | + let sample_uv = uniforms.viewport.xy + output_uv * uniforms.viewport.zw; |
| 129 | + var sample = sample_input_13_tap(sample_uv); |
| 130 | + // Lower bound of 0.0001 is to avoid propagating multiplying by 0.0 through the |
| 131 | + // downscaling and upscaling which would result in black boxes. |
| 132 | + // The upper bound is to prevent NaNs. |
| 133 | + sample = clamp(sample, vec3<f32>(0.0001), vec3<f32>(3.40282347E+38)); |
114 | 134 |
|
115 | | - let scale = texel_size; |
| 135 | +#ifdef USE_THRESHOLD |
| 136 | + sample = soft_threshold(sample); |
| 137 | +#endif |
116 | 138 |
|
117 | | - return sample_13_tap(uv, scale); |
| 139 | + return vec4<f32>(sample, 1.0); |
118 | 140 | } |
| 141 | +#endif |
119 | 142 |
|
120 | 143 | @fragment |
121 | | -fn upsample(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> { |
122 | | - let texel_size = 1.0 / vec2<f32>(textureDimensions(original)); |
123 | | - |
124 | | - let upsample = sample_original_3x3_tent(uv, texel_size * uniforms.scale); |
125 | | - var color: vec4<f32> = textureSample(up, original_sampler, uv); |
126 | | - color = vec4<f32>(color.rgb + upsample.rgb, upsample.a); |
127 | | - |
128 | | - return color; |
| 144 | +fn downsample(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> { |
| 145 | + return vec4<f32>(sample_input_13_tap(uv), 1.0); |
129 | 146 | } |
130 | 147 |
|
131 | 148 | @fragment |
132 | | -fn upsample_final(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> { |
133 | | - let texel_size = 1.0 / vec2<f32>(textureDimensions(original)); |
134 | | - |
135 | | - let upsample = sample_original_3x3_tent(uv, texel_size * uniforms.scale); |
136 | | - |
137 | | - return vec4<f32>(upsample.rgb * uniforms.intensity, upsample.a); |
| 149 | +fn upsample(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> { |
| 150 | + return vec4<f32>(sample_input_3x3_tent(uv), 1.0); |
138 | 151 | } |
0 commit comments