Skip to content

Commit 667314f

Browse files
committed
Vulkan Sampler Cache
1 parent c46624f commit 667314f

File tree

7 files changed

+201
-12
lines changed

7 files changed

+201
-12
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ noise = { version = "0.8", git = "https://github.com/Razaekel/noise-rs.git", rev
109109
nv-flip = "0.1"
110110
obj = "0.10"
111111
once_cell = "1.20.2"
112+
ordered-float = ">=3,<=4.6"
112113
parking_lot = "0.12.1"
113114
pico-args = { version = "0.5.0", features = [
114115
"eq-separator",

wgpu-hal/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ vulkan = [
5353
"dep:libloading",
5454
"dep:smallvec",
5555
"dep:android_system_properties",
56+
"dep:ordered-float",
5657
]
5758
gles = [
5859
"naga/glsl-out",
@@ -125,6 +126,7 @@ profiling = { workspace = true, default-features = false }
125126
raw-window-handle.workspace = true
126127
thiserror.workspace = true
127128
once_cell.workspace = true
129+
ordered-float = { workspace = true, optional = true }
128130

129131
# backends common
130132
arrayvec.workspace = true

wgpu-hal/src/vulkan/adapter.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1557,6 +1557,10 @@ impl super::Instance {
15571557
#[cfg(windows)]
15581558
external_memory_win32: phd_capabilities
15591559
.supports_extension(khr::external_memory_win32::NAME),
1560+
maximum_samplers: phd_capabilities
1561+
.properties
1562+
.limits
1563+
.max_sampler_allocation_count,
15601564
};
15611565
let capabilities = crate::Capabilities {
15621566
limits: phd_capabilities.to_wgpu_limits(),
@@ -1905,6 +1909,9 @@ impl super::Adapter {
19051909
workarounds: self.workarounds,
19061910
render_passes: Mutex::new(Default::default()),
19071911
framebuffers: Mutex::new(Default::default()),
1912+
sampler_cache: Mutex::new(super::sampler::SamplerCache::new(
1913+
self.private_caps.maximum_samplers,
1914+
)),
19081915
memory_allocations_counter: Default::default(),
19091916
});
19101917

wgpu-hal/src/vulkan/device.rs

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,7 +1296,7 @@ impl crate::Device for super::Device {
12961296
&self,
12971297
desc: &crate::SamplerDescriptor,
12981298
) -> Result<super::Sampler, crate::DeviceError> {
1299-
let mut vk_info = vk::SamplerCreateInfo::default()
1299+
let mut create_info = vk::SamplerCreateInfo::default()
13001300
.flags(vk::SamplerCreateFlags::empty())
13011301
.mag_filter(conv::map_filter_mode(desc.mag_filter))
13021302
.min_filter(conv::map_filter_mode(desc.min_filter))
@@ -1308,40 +1308,44 @@ impl crate::Device for super::Device {
13081308
.max_lod(desc.lod_clamp.end);
13091309

13101310
if let Some(fun) = desc.compare {
1311-
vk_info = vk_info
1311+
create_info = create_info
13121312
.compare_enable(true)
13131313
.compare_op(conv::map_comparison(fun));
13141314
}
13151315

13161316
if desc.anisotropy_clamp != 1 {
13171317
// We only enable anisotropy if it is supported, and wgpu-hal interface guarantees
13181318
// the clamp is in the range [1, 16] which is always supported if anisotropy is.
1319-
vk_info = vk_info
1319+
create_info = create_info
13201320
.anisotropy_enable(true)
13211321
.max_anisotropy(desc.anisotropy_clamp as f32);
13221322
}
13231323

13241324
if let Some(color) = desc.border_color {
1325-
vk_info = vk_info.border_color(conv::map_border_color(color));
1325+
create_info = create_info.border_color(conv::map_border_color(color));
13261326
}
13271327

1328-
let raw = unsafe {
1329-
self.shared
1330-
.raw
1331-
.create_sampler(&vk_info, None)
1332-
.map_err(super::map_host_device_oom_and_ioca_err)?
1333-
};
1328+
let raw = self
1329+
.shared
1330+
.sampler_cache
1331+
.lock()
1332+
.create_sampler(&self.shared.raw, create_info)?;
13341333

1334+
// TODO: Cached samplers will just continually overwrite the label
13351335
if let Some(label) = desc.label {
13361336
unsafe { self.shared.set_object_name(raw, label) };
13371337
}
13381338

13391339
self.counters.samplers.add(1);
13401340

1341-
Ok(super::Sampler { raw })
1341+
Ok(super::Sampler { raw, create_info })
13421342
}
13431343
unsafe fn destroy_sampler(&self, sampler: super::Sampler) {
1344-
unsafe { self.shared.raw.destroy_sampler(sampler.raw, None) };
1344+
self.shared.sampler_cache.lock().destroy_sampler(
1345+
&self.shared.raw,
1346+
sampler.create_info,
1347+
sampler.raw,
1348+
);
13451349

13461350
self.counters.samplers.sub(1);
13471351
}

wgpu-hal/src/vulkan/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ mod command;
2929
mod conv;
3030
mod device;
3131
mod instance;
32+
mod sampler;
3233

3334
use std::{
3435
borrow::Borrow,
@@ -534,6 +535,7 @@ struct PrivateCapabilities {
534535
image_format_list: bool,
535536
#[cfg(windows)]
536537
external_memory_win32: bool,
538+
maximum_samplers: u32,
537539
}
538540

539541
bitflags::bitflags!(
@@ -643,6 +645,7 @@ struct DeviceShared {
643645
features: wgt::Features,
644646
render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>,
645647
framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>,
648+
sampler_cache: Mutex<sampler::SamplerCache>,
646649
memory_allocations_counter: InternalCounter,
647650
}
648651

@@ -830,6 +833,7 @@ impl TextureView {
830833
#[derive(Debug)]
831834
pub struct Sampler {
832835
raw: vk::Sampler,
836+
create_info: vk::SamplerCreateInfo<'static>,
833837
}
834838

835839
impl crate::DynSampler for Sampler {}

wgpu-hal/src/vulkan/sampler.rs

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
//! Sampler cache for Vulkan backend.
2+
//!
3+
//! Nearly identical to the DX12 sampler cache, without descriptor heap management.
4+
5+
use std::collections::{hash_map::Entry, HashMap};
6+
7+
use ash::vk;
8+
use ordered_float::OrderedFloat;
9+
10+
/// [`vk::SamplerCreateInfo`] is not hashable, so we wrap it in a newtype that is.
11+
///
12+
/// We use [`OrderedFloat`] to allow for floating point values to be compared and
13+
/// hashed in a defined way.
14+
#[derive(Copy, Clone)]
15+
struct HashableSamplerCreateInfo(vk::SamplerCreateInfo<'static>);
16+
17+
impl PartialEq for HashableSamplerCreateInfo {
18+
fn eq(&self, other: &Self) -> bool {
19+
self.0.flags == other.0.flags
20+
&& self.0.mag_filter == other.0.mag_filter
21+
&& self.0.min_filter == other.0.min_filter
22+
&& self.0.mipmap_mode == other.0.mipmap_mode
23+
&& self.0.address_mode_u == other.0.address_mode_u
24+
&& self.0.address_mode_v == other.0.address_mode_v
25+
&& self.0.address_mode_w == other.0.address_mode_w
26+
&& OrderedFloat(self.0.mip_lod_bias) == OrderedFloat(other.0.mip_lod_bias)
27+
&& self.0.anisotropy_enable == other.0.anisotropy_enable
28+
&& OrderedFloat(self.0.max_anisotropy) == OrderedFloat(other.0.max_anisotropy)
29+
&& self.0.compare_enable == other.0.compare_enable
30+
&& self.0.compare_op == other.0.compare_op
31+
&& OrderedFloat(self.0.min_lod) == OrderedFloat(other.0.min_lod)
32+
&& OrderedFloat(self.0.max_lod) == OrderedFloat(other.0.max_lod)
33+
&& self.0.border_color == other.0.border_color
34+
&& self.0.unnormalized_coordinates == other.0.unnormalized_coordinates
35+
}
36+
}
37+
38+
impl Eq for HashableSamplerCreateInfo {}
39+
40+
impl std::hash::Hash for HashableSamplerCreateInfo {
41+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
42+
self.0.flags.hash(state);
43+
self.0.mag_filter.hash(state);
44+
self.0.min_filter.hash(state);
45+
self.0.mipmap_mode.hash(state);
46+
self.0.address_mode_u.hash(state);
47+
self.0.address_mode_v.hash(state);
48+
self.0.address_mode_w.hash(state);
49+
OrderedFloat(self.0.mip_lod_bias).hash(state);
50+
self.0.anisotropy_enable.hash(state);
51+
OrderedFloat(self.0.max_anisotropy).hash(state);
52+
self.0.compare_enable.hash(state);
53+
self.0.compare_op.hash(state);
54+
OrderedFloat(self.0.min_lod).hash(state);
55+
OrderedFloat(self.0.max_lod).hash(state);
56+
self.0.border_color.hash(state);
57+
self.0.unnormalized_coordinates.hash(state);
58+
}
59+
}
60+
61+
/// Entry in the sampler cache.
62+
struct CacheEntry {
63+
sampler: vk::Sampler,
64+
ref_count: u32,
65+
}
66+
67+
/// Global sampler cache.
68+
///
69+
/// As some devices have a low limit (4000) on the number of unique samplers that can be created,
70+
/// we need to cache samplers to avoid running out if people eagerly create duplicate samplers.
71+
pub(crate) struct SamplerCache {
72+
/// Mapping from the sampler description to sampler and reference count.
73+
samplers: HashMap<HashableSamplerCreateInfo, CacheEntry>,
74+
/// Maximum number of unique samplers that can be created.
75+
total_capacity: u32,
76+
}
77+
78+
impl SamplerCache {
79+
pub fn new(total_capacity: u32) -> Self {
80+
SamplerCache {
81+
samplers: HashMap::new(),
82+
total_capacity,
83+
}
84+
}
85+
86+
/// Create a sampler, or return an existing one if it already exists.
87+
///
88+
/// If the sampler already exists, the reference count is incremented.
89+
///
90+
/// If the sampler does not exist, a new sampler is created and inserted into the cache.
91+
///
92+
/// If the cache is full, an error is returned.
93+
pub fn create_sampler(
94+
&mut self,
95+
device: &ash::Device,
96+
create_info: vk::SamplerCreateInfo<'static>,
97+
) -> Result<vk::Sampler, crate::DeviceError> {
98+
// Get the number of used samplers. Needs to be done before to appease the borrow checker.
99+
let used_samplers = self.samplers.len();
100+
101+
match self.samplers.entry(HashableSamplerCreateInfo(create_info)) {
102+
Entry::Occupied(occupied_entry) => {
103+
// We have found a match, so increment the refcount and return the index.
104+
let value = occupied_entry.into_mut();
105+
value.ref_count += 1;
106+
Ok(value.sampler)
107+
}
108+
Entry::Vacant(vacant_entry) => {
109+
// We need to create a new sampler.
110+
111+
// We need to check if we can create more samplers.
112+
if used_samplers >= self.total_capacity as usize {
113+
log::error!("There is no more room in the global sampler heap for more unique samplers. Your device supports a maximum of {} unique samplers.", self.samplers.len());
114+
return Err(crate::DeviceError::OutOfMemory);
115+
}
116+
117+
// Create the sampler.
118+
let sampler = unsafe { device.create_sampler(&create_info, None) }
119+
.map_err(super::map_host_device_oom_and_ioca_err)?;
120+
121+
// Insert the new sampler into the mapping.
122+
vacant_entry.insert(CacheEntry {
123+
sampler,
124+
ref_count: 1,
125+
});
126+
127+
Ok(sampler)
128+
}
129+
}
130+
}
131+
132+
/// Decrease the reference count of a sampler and destroy it if the reference count reaches 0.
133+
///
134+
/// The provided sampler is checked against the sampler in the cache to ensure there is no clerical error.
135+
pub fn destroy_sampler(
136+
&mut self,
137+
device: &ash::Device,
138+
create_info: vk::SamplerCreateInfo<'static>,
139+
provided_sampler: vk::Sampler,
140+
) {
141+
let Entry::Occupied(mut hash_map_entry) =
142+
self.samplers.entry(HashableSamplerCreateInfo(create_info))
143+
else {
144+
log::error!("Trying to destroy a sampler that does not exist.");
145+
return;
146+
};
147+
let cache_entry = hash_map_entry.get_mut();
148+
149+
assert_eq!(
150+
cache_entry.sampler, provided_sampler,
151+
"Provided sampler does not match the sampler in the cache."
152+
);
153+
154+
cache_entry.ref_count -= 1;
155+
156+
if cache_entry.ref_count == 0 {
157+
unsafe { device.destroy_sampler(cache_entry.sampler, None) };
158+
hash_map_entry.remove();
159+
}
160+
}
161+
}

0 commit comments

Comments
 (0)