65 static constexpr TreeIndex::offset_type
const BF =
ipow(std::size_t(2), Dim);
71 using pos_type = Index::pos_type;
73 static constexpr std::size_t
const NumBuffers =
sizeof...(Ts);
78 [[nodiscard]]
LeafData& leafData() {
return leaf_data_; }
80 [[nodiscard]]
LeafData const& leafData()
const {
return leaf_data_; }
82 [[nodiscard]]
InnerData& innerData() {
return inner_data_; }
84 [[nodiscard]]
InnerData const& innerData()
const {
return inner_data_; }
92 [[nodiscard]]
bool exists(pos_type block)
const
94 return leaf(block) ? leafExists(block) : innerExists(block);
97 [[nodiscard]]
bool leafExists(pos_type block)
const
100 return leaf_data_.capacity() > removeLeafType(block);
103 [[nodiscard]]
bool innerExists(pos_type block)
const
105 assert(inner(block));
106 return inner_data_.capacity() > removeInnerType(block);
109 bool gpuInit(WGPUPowerPreference power_preference = WGPUPowerPreference_HighPerformance,
110 WGPUBackendType backend_type = WGPUBackendType_Undefined)
112 if (
nullptr != device_) {
116 instance_ = compute::createInstance();
117 adapter_ = compute::createAdapter(instance_,
nullptr, power_preference, backend_type);
118 auto required_limits = requiredLimits(adapter_);
119 device_ = compute::createDevice(adapter_, &required_limits);
120 queue_ = compute::queue(device_);
125 bool gpuInit(WGPULimits
const& required_limits,
126 WGPUSurface compatible_surface =
nullptr,
127 WGPUPowerPreference power_preference = WGPUPowerPreference_HighPerformance,
128 WGPUBackendType backend_type = WGPUBackendType_Undefined)
130 if (
nullptr != device_) {
134 instance_ = compute::createInstance();
135 adapter_ = compute::createAdapter(instance_, compatible_surface, power_preference,
137 device_ = compute::createDevice(adapter_, &required_limits);
138 queue_ = compute::queue(device_);
143 bool gpuInit(WGPUAdapter adapter)
145 assert(
nullptr != adapter);
147 return gpuInit(adapter, requiredLimits(adapter));
150 bool gpuInit(WGPUAdapter adapter, WGPULimits
const& required_limits)
152 if (
nullptr != device_) {
156 assert(
nullptr != adapter);
159 wgpuAdapterAddRef(adapter);
162 device_ = compute::createDevice(adapter, &required_limits);
163 queue_ = compute::queue(device_);
168 bool gpuInit(WGPUDevice device)
170 if (
nullptr != device_) {
174 assert(
nullptr != device);
177 wgpuDeviceAddRef(device);
180 queue_ = compute::queue(device);
187 for (
auto& buffers : leaf_buffers_) {
188 for (WGPUBuffer& buf : buffers) {
189 if (
nullptr != buf) {
190 wgpuBufferRelease(buf);
196 for (
auto& buffers : inner_buffers_) {
197 for (WGPUBuffer& buf : buffers) {
198 if (
nullptr != buf) {
199 wgpuBufferRelease(buf);
205 if (
nullptr != queue_) {
206 wgpuQueueRelease(queue_);
210 if (
nullptr != device_) {
211 wgpuDeviceRelease(device_);
215 if (
nullptr != adapter_) {
216 wgpuAdapterRelease(adapter_);
220 if (
nullptr != instance_) {
221 wgpuInstanceRelease(instance_);
226 [[nodiscard]] WGPUDevice gpuDevice()
const {
return device_; }
228 [[nodiscard]] WGPUQueue gpuQueue()
const {
return queue_; }
231 [[nodiscard]] std::size_t gpuNumBuffers()
const
233 return gpuNumLeafBuffers<T>() + gpuNumInnerBuffers<T>();
237 [[nodiscard]] std::size_t gpuNumLeafBuffers()
const
239 return leaf_buffers_[index_v<T, Ts...>].size();
243 [[nodiscard]] std::size_t gpuNumInnerBuffers()
const
245 return inner_buffers_[index_v<T, Ts...>].size();
249 [[nodiscard]] WGPUBuffer gpuLeafBuffer(std::size_t index)
const
251 return leaf_buffers_[index_v<T, Ts...>][index];
255 [[nodiscard]] WGPUBuffer gpuInnerBuffer(std::size_t index)
const
257 return inner_buffers_[index_v<T, Ts...>][index];
261 [[nodiscard]] std::size_t gpuLeafBufferSize(std::size_t index)
const
263 return wgpuBufferGetSize(gpuLeafBuffer<T>(index));
267 [[nodiscard]] std::size_t gpuInnerBufferSize(std::size_t index)
const
269 return wgpuBufferGetSize(gpuInnerBuffer<T>(index));
278 void gpuReadLeaf() { (gpuReadLeaf<Ts>(), ...); }
280 void gpuReadInner() { (gpuReadInner<Ts>(), ...); }
303 bool a = gpuWriteLeaf();
304 bool b = gpuWriteInner();
308 bool gpuWriteLeaf() {
return (gpuWriteLeaf<Ts>() | ...); }
310 bool gpuWriteInner() {
return (gpuWriteInner<Ts>() | ...); }
315 bool a = gpuWriteLeaf<T>();
316 bool b = gpuWriteInner<T>();
325 assert(
nullptr != device_);
326 assert(
nullptr != queue_);
328 using Block =
typename T::template LeafBlock<Dim, BF>;
330 std::size_t
const size = leaf_data_.template serializedSize<Block>();
332 auto& buffers = leaf_buffers_[index_v<T, Ts...>];
335 for (
auto& buf : buffers) {
336 wgpuBufferRelease(buf);
338 bool empty = buffers.empty();
343 constexpr std::size_t
const bucket_size =
344 LeafData::template serializedBucketSize<Block>();
346 std::size_t
const buckets_per_buffer = max_buffer_size_ / bucket_size;
347 std::size_t
const buffer_size = bucket_size * buckets_per_buffer;
349 std::size_t
const num_buffers = 1 + (size - 1) / buffer_size;
351 buffers.reserve(num_buffers);
353 bool updated =
false;
355 auto it = leaf_data_.template beginBucket<Block>();
356 auto last = leaf_data_.template endBucket<Block>();
358 for (std::size_t i{}; num_buffers > i; ++i) {
359 if (buffers.size() <= i) {
362 auto& buffer = buffers.emplace_back(compute::createBuffer(
363 device_,
"", buffer_size, WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst,
366 assert(
nullptr != buffer);
368 void* buf = wgpuBufferGetMappedRange(buffer, 0, buffer_size);
370 for (std::size_t i{}; buckets_per_buffer > i && it != last; ++i, ++it) {
371 auto& [data, modified] = *it;
373 std::memcpy(buf, data.data(), bucket_size);
374 buf =
static_cast<void*
>(
static_cast<unsigned char*
>(buf) + bucket_size);
378 wgpuBufferUnmap(buffer);
380 WGPUBuffer& buffer = buffers[i];
382 std::size_t offset = 0;
383 for (std::size_t i{}; buckets_per_buffer > i && it != last; ++i, ++it) {
384 auto& [data, modified] = *it;
387 wgpuQueueWriteBuffer(queue_, buffer, offset, data.data(), bucket_size);
390 offset += bucket_size;
398 for (std::size_t i = num_buffers; buffers.size() > i; ++i) {
400 wgpuBufferRelease(buffers[i]);
402 buffers.resize(num_buffers);
412 assert(
nullptr != device_);
413 assert(
nullptr != queue_);
415 using Block =
typename T::template InnerBlock<Dim, BF>;
417 std::size_t
const size = inner_data_.template serializedSize<Block>();
419 auto& buffers = inner_buffers_[index_v<T, Ts...>];
422 for (
auto& buf : buffers) {
423 wgpuBufferRelease(buf);
425 bool empty = buffers.empty();
430 constexpr std::size_t
const bucket_size =
431 InnerData::template serializedBucketSize<Block>();
433 std::size_t
const buckets_per_buffer = max_buffer_size_ / bucket_size;
434 std::size_t
const buffer_size = bucket_size * buckets_per_buffer;
436 std::size_t
const num_buffers = 1 + (size - 1) / buffer_size;
438 buffers.reserve(num_buffers);
440 bool updated =
false;
442 auto it = inner_data_.template beginBucket<Block>();
443 auto last = inner_data_.template endBucket<Block>();
445 for (std::size_t i{}; num_buffers > i; ++i) {
446 if (buffers.size() <= i) {
449 auto& buffer = buffers.emplace_back(compute::createBuffer(
450 device_,
"", buffer_size, WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst,
453 assert(
nullptr != buffer);
455 void* buf = wgpuBufferGetMappedRange(buffer, 0, buffer_size);
457 for (std::size_t i{}; buckets_per_buffer > i && it != last; ++i, ++it) {
458 auto& [data, modified] = *it;
460 std::memcpy(buf, data.data(), bucket_size);
461 buf =
static_cast<void*
>(
static_cast<unsigned char*
>(buf) + bucket_size);
465 wgpuBufferUnmap(buffer);
467 WGPUBuffer& buffer = buffers[i];
469 std::size_t offset = 0;
470 for (std::size_t i{}; buckets_per_buffer > i && it != last; ++i, ++it) {
471 auto& [data, modified] = *it;
474 wgpuQueueWriteBuffer(queue_, buffer, offset, data.data(), bucket_size);
477 offset += bucket_size;
485 for (std::size_t i = num_buffers; buffers.size() > i; ++i) {
487 wgpuBufferRelease(buffers[i]);
489 buffers.resize(num_buffers);
495 [[nodiscard]]
static constexpr bool leaf(pos_type block)
noexcept
497 return Index::TYPE_BIT != (Index::TYPE_BIT & block);
500 [[nodiscard]]
static constexpr bool inner(pos_type block)
noexcept
505 [[nodiscard]]
static constexpr pos_type addLeafType(pos_type block)
noexcept
510 [[nodiscard]]
static constexpr pos_type removeLeafType(pos_type block)
noexcept
515 [[nodiscard]]
static constexpr pos_type addInnerType(pos_type block)
noexcept
517 return Index::TYPE_BIT | block;
520 [[nodiscard]]
static constexpr pos_type removeInnerType(pos_type block)
noexcept
522 return ~Index::TYPE_BIT & block;
525 [[nodiscard]] std::size_t size()
const {
return leafSize() + innerSize(); }
527 [[nodiscard]] std::size_t leafSize()
const {
return leaf_data_.size(); }
529 [[nodiscard]] std::size_t innerSize()
const {
return inner_data_.size(); }
531 void reserve(std::size_t cap)
533 leafReserve((cap + 1) / 2);
534 innerReserve(cap / 2);
537 void leafReserve(std::size_t cap) { leaf_data_.reserve(cap); }
539 void innerReserve(std::size_t cap) { inner_data_.reserve(cap); }
547 void leafClear() { leaf_data_.clear(); }
549 void innerClear() { inner_data_.clear(); }
551 [[nodiscard]] pos_type create(
bool leaf) {
return leaf ? leafCreate() : innerCreate(); }
553 [[nodiscard]] pos_type leafCreate() {
return addLeafType(leaf_data_.create()); }
555 [[nodiscard]] pos_type innerCreate() {
return addInnerType(inner_data_.create()); }
557 [[nodiscard]] pos_type createThreadSafe(
bool leaf)
559 return leaf ? leafCreateThreadSafe() : innerCreateThreadSafe();
562 [[nodiscard]] pos_type leafCreateThreadSafe()
564 return addLeafType(leaf_data_.createThreadSafe());
567 [[nodiscard]] pos_type innerCreateThreadSafe()
569 return addInnerType(inner_data_.createThreadSafe());
572 void erase(pos_type block) { leaf(block) ? leafErase(block) : innerErase(block); }
574 void leafErase(pos_type block) { leaf_data_.eraseBlock(removeLeafType(block)); }
576 void innerErase(pos_type block) { inner_data_.eraseBlock(removeInnerType(block)); }
579 [[nodiscard]] T& leafBlock(pos_type block)
581 assert(leafExists(block));
582 return leaf_data_.template get<T>(removeLeafType(block));
586 [[nodiscard]] T
const& leafBlock(pos_type block)
const
588 return leaf_data_.template get<T>(removeLeafType(block));
592 [[nodiscard]] T& innerBlock(pos_type block)
594 assert(innerExists(block));
595 return inner_data_.template get<T>(removeInnerType(block));
599 [[nodiscard]] T
const& innerBlock(pos_type block)
const
601 return inner_data_.template get<T>(removeInnerType(block));
605 [[nodiscard]] WGPULimits requiredLimits(WGPUAdapter adapter)
607 WGPULimits required = WGPU_LIMITS_INIT;
608 WGPULimits supported = WGPU_LIMITS_INIT;
610 wgpuAdapterGetLimits(adapter, &supported);
614 required.minUniformBufferOffsetAlignment = supported.minUniformBufferOffsetAlignment;
615 required.minStorageBufferOffsetAlignment = supported.minStorageBufferOffsetAlignment;
618 std::min(max_buffer_size_,
static_cast<std::size_t
>(supported.maxBufferSize));
620 std::min(max_buffer_size_,
621 static_cast<std::size_t
>(supported.maxStorageBufferBindingSize));
623 required.maxBufferSize = max_buffer_size_;
624 required.maxStorageBufferBindingSize = max_buffer_size_;
626 required.maxComputeWorkgroupStorageSize = 16352;
627 required.maxComputeInvocationsPerWorkgroup = 256;
628 required.maxComputeWorkgroupSizeX = 256;
629 required.maxComputeWorkgroupSizeY = 256;
630 required.maxComputeWorkgroupSizeZ = 64;
631 required.maxComputeWorkgroupsPerDimension = 65535;
633 required.maxUniformBuffersPerShaderStage = 12;
634 required.maxUniformBufferBindingSize = 65536;
641 InnerData inner_data_;
643 WGPUInstance instance_ =
nullptr;
644 WGPUAdapter adapter_ =
nullptr;
645 WGPUDevice device_ =
nullptr;
646 WGPUQueue queue_ =
nullptr;
647 std::array<std::vector<WGPUBuffer>, NumBuffers> leaf_buffers_{};
648 std::array<std::vector<WGPUBuffer>, NumBuffers> inner_buffers_{};
650 std::size_t max_buffer_size_ = 1'073'741'824 / 2;