Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
424 changes: 212 additions & 212 deletions .gitea/workflows/ci.yml

Large diffs are not rendered by default.

604 changes: 302 additions & 302 deletions .gitea/workflows/release.yml

Large diffs are not rendered by default.

424 changes: 212 additions & 212 deletions .github/workflows/ci.yml

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions internal/CoreRef.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3212,8 +3212,14 @@ void Ray::Ref::SampleLightSource(const fvec4 &P, const fvec4 &T, const fvec4 &B,

const fvec4 center = make_fvec3(l.sph.pos);
const fvec4 surface_to_center = center - P;
const float d = length(surface_to_center);

const float temp = sqrtf(d * d - l.sph.radius * l.sph.radius);
const float disk_radius = (temp * l.sph.radius) / d;
const float k = l.sph.radius > 0.0f ? ((temp * disk_radius) / (l.sph.radius * d)) : 1.0f;

float disk_dist;
const fvec4 sampled_dir = normalize_len(map_to_cone(r1, r2, surface_to_center, l.sph.radius), disk_dist);
const fvec4 sampled_dir = normalize_len(map_to_cone(r1, r2, k * surface_to_center, disk_radius), disk_dist);

if (l.sph.radius > 0.0f) {
const float ls_dist = sphere_intersection(center, l.sph.radius, P, sampled_dir);
Expand All @@ -3222,7 +3228,7 @@ void Ray::Ref::SampleLightSource(const fvec4 &P, const fvec4 &T, const fvec4 &B,
const fvec4 light_forward = normalize(light_surf_pos - center);

ls.lp = offset_ray(light_surf_pos, light_forward);
ls.pdf = (disk_dist * disk_dist) / (PI * l.sph.radius * l.sph.radius);
ls.pdf = (disk_dist * disk_dist) / (PI * disk_radius * disk_radius);
} else {
ls.lp = center;
ls.pdf = (disk_dist * disk_dist) / PI;
Expand Down
31 changes: 21 additions & 10 deletions internal/CoreSIMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -2430,7 +2430,7 @@ template <int S> void create_tbn(const fvec<S> N[3], fvec<S> out_T[3], fvec<S> o
}

template <int S>
void map_to_cone(const fvec<S> &r1, const fvec<S> &r2, const fvec<S> N[3], float radius, fvec<S> out_V[3]) {
void map_to_cone(const fvec<S> &r1, const fvec<S> &r2, const fvec<S> N[3], const fvec<S> &radius, fvec<S> out_V[3]) {
const fvec<S> offset[2] = {2.0f * r1 - 1.0f, 2.0f * r2 - 1.0f};

UNROLLED_FOR(i, 3, { out_V[i] = N[i]; })
Expand Down Expand Up @@ -2460,7 +2460,7 @@ force_inline fvec<S> sphere_intersection(const float center[3], const float radi
const fvec<S> b = 2 * dot3(oc, rd);
const fvec<S> c = dot3(oc, oc) - radius * radius;
const fvec<S> discriminant = b * b - 4 * a * c;
return (-b - sqrt(max(discriminant, 0.0f))) / (2 * a);
return safe_div_pos(-b - safe_sqrt(discriminant), 2 * a);
}

template <int S> force_inline fvec<S> schlick_weight(const fvec<S> &u) {
Expand Down Expand Up @@ -5521,9 +5521,16 @@ void Ray::NS::SampleLightSource(const fvec<S> P[3], const fvec<S> T[3], const fv
const fvec<S> r1 = rand_light_uv[0], r2 = rand_light_uv[1];

const float *center = l.sph.pos;
const fvec<S> surface_to_center[3] = {center[0] - P[0], center[1] - P[1], center[2] - P[2]};
fvec<S> surface_to_center[3] = {center[0] - P[0], center[1] - P[1], center[2] - P[2]};
const fvec<S> d = length(surface_to_center);

const fvec<S> temp = safe_sqrt(d * d - l.sph.radius * l.sph.radius);
const fvec<S> disk_radius = (temp * l.sph.radius) / d;
const fvec<S> k = l.sph.radius > 0.0f ? ((temp * disk_radius) / (l.sph.radius * d)) : 1.0f;
UNROLLED_FOR(i, 3, { surface_to_center[i] *= k; })

fvec<S> sampled_dir[3];
map_to_cone(r1, r2, surface_to_center, l.sph.radius, sampled_dir);
map_to_cone(r1, r2, surface_to_center, disk_radius, sampled_dir);
const fvec<S> disk_dist = normalize(sampled_dir);

if (l.sph.radius > 0.0f) {
Expand All @@ -5539,7 +5546,7 @@ void Ray::NS::SampleLightSource(const fvec<S> P[3], const fvec<S> T[3], const fv
offset_ray(light_surf_pos, light_forward, lp_biased);

UNROLLED_FOR(i, 3, { where(ray_queue[index], ls.lp[i]) = lp_biased[i]; })
where(ray_queue[index], ls.pdf) = safe_div_pos(disk_dist * disk_dist, PI * l.sph.radius * l.sph.radius);
where(ray_queue[index], ls.pdf) = safe_div_pos(disk_dist * disk_dist, PI * disk_radius * disk_radius);
} else {
UNROLLED_FOR(i, 3, { where(ray_queue[index], ls.lp[i]) = center[i]; })
where(ray_queue[index], ls.pdf) = (disk_dist * disk_dist) / PI;
Expand Down Expand Up @@ -5577,7 +5584,7 @@ void Ray::NS::SampleLightSource(const fvec<S> P[3], const fvec<S> T[3], const fv
const float radius = tanf(l.dir.angle);

fvec<S> V[3];
map_to_cone(rand_light_uv[0], rand_light_uv[1], ls.L, radius, V);
map_to_cone(rand_light_uv[0], rand_light_uv[1], ls.L, fvec<S>{radius}, V);
safe_normalize(V);

UNROLLED_FOR(i, 3, { where(ray_queue[index], ls.L[i]) = V[i]; })
Expand Down Expand Up @@ -6559,11 +6566,15 @@ void Ray::NS::Evaluate_LightColor(const fvec<S> P[3], const ray_data_t<S> &ray,
}

if (l.type == LIGHT_TYPE_SPHERE) {
fvec<S> disk_normal[3] = {ray.o[0] - l.sph.pos[0], ray.o[1] - l.sph.pos[1], ray.o[2] - l.sph.pos[2]};
normalize(disk_normal);
const fvec<S> disk_dist = dot3(ray.o, disk_normal) - dot3(l.sph.pos, disk_normal);
const float *light_pos = l.sph.pos;
fvec<S> surface_to_center[3] = {light_pos[0] - ray.o[0], light_pos[1] - ray.o[1], light_pos[2] - ray.o[2]};
const fvec<S> d = length(surface_to_center);

const fvec<S> temp = safe_sqrt(d * d - l.sph.radius * l.sph.radius);
const fvec<S> disk_radius = (temp * l.sph.radius) / d;
const fvec<S> disk_dist = (temp * disk_radius) / l.sph.radius;

const fvec<S> light_pdf = safe_div(disk_dist * disk_dist, PI * l.sph.radius * l.sph.radius * pdf_factor);
const fvec<S> light_pdf = safe_div(disk_dist * disk_dist, PI * disk_radius * disk_radius * pdf_factor);
const fvec<S> bsdf_pdf = ray.pdf;

const fvec<S> mis_weight = power_heuristic(bsdf_pdf, light_pdf);
Expand Down
20 changes: 5 additions & 15 deletions internal/Dx/MemoryAllocatorDX.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class MemAllocators {
uint32_t initial_pool_size_;
float growth_factor_;
uint32_t max_pool_size_;
SmallVector<std::unique_ptr<MemAllocator>, 4> allocators_;
std::unique_ptr<MemAllocator> allocators_[8];

public:
MemAllocators(const char *name, Context *ctx, const uint32_t initial_pool_size, const float growth_factor,
Expand All @@ -102,23 +102,13 @@ class MemAllocators {
max_pool_size_(max_pool_size) {}

MemAllocation Allocate(const uint32_t alignment, const uint32_t size, const D3D12_HEAP_TYPE heap_type) {
int alloc_index = -1;
for (int i = 0; i < int(allocators_.size()); ++i) {
if (allocators_[i]->heap_type() == heap_type) {
alloc_index = i;
break;
}
}

if (alloc_index == -1) {
if (!allocators_[heap_type]) {
std::string name = name_;
name += " (type " + std::to_string(heap_type) + ")";
alloc_index = int(allocators_.size());
allocators_.emplace_back(std::make_unique<MemAllocator>(name.c_str(), ctx_, initial_pool_size_, heap_type,
growth_factor_, max_pool_size_));
allocators_[heap_type] = std::make_unique<MemAllocator>(name.c_str(), ctx_, initial_pool_size_, heap_type,
growth_factor_, max_pool_size_);
}

return allocators_[alloc_index]->Allocate(alignment, size);
return allocators_[heap_type]->Allocate(alignment, size);
}
};
} // namespace Dx
Expand Down
10 changes: 7 additions & 3 deletions internal/ShadeRef.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1077,10 +1077,14 @@ Ray::Ref::fvec4 Ray::Ref::Evaluate_LightColor(const ray_data_t &ray, const hit_d
if (l.type == LIGHT_TYPE_SPHERE) {
const fvec4 light_pos = make_fvec3(l.sph.pos);

const fvec4 disk_normal = normalize(ro - light_pos);
const float disk_dist = dot(ro, disk_normal) - dot(light_pos, disk_normal);
const fvec4 surface_to_center = light_pos - ro;
const float d = length(surface_to_center);

const float light_pdf = (disk_dist * disk_dist) / (PI * l.sph.radius * l.sph.radius * pdf_factor);
const float temp = sqrtf(d * d - l.sph.radius * l.sph.radius);
const float disk_radius = (temp * l.sph.radius) / d;
const float disk_dist = (temp * disk_radius) / l.sph.radius;

const float light_pdf = (disk_dist * disk_dist) / (PI * disk_radius * disk_radius * pdf_factor);
const float bsdf_pdf = ray.pdf;

const float mis_weight = power_heuristic(bsdf_pdf, light_pdf);
Expand Down
43 changes: 33 additions & 10 deletions internal/Vk/ContextVK.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ bool Ray::Vk::Context::Init(ILog *log, const VulkanDevice &vk_device, const Vulk

CheckVkPhysicalDeviceFeatures(api_, physical_device_, device_properties_, mem_properties_, graphics_family_index_,
raytracing_supported_, ray_query_supported_, fp16_supported_, int64_supported_,
int64_atomics_supported_, coop_matrix_supported_);
int64_atomics_supported_, coop_matrix_supported_, pageable_memory_supported_);

if (!raytracing_supported_) {
// mask out unsupported stage
Expand All @@ -220,7 +220,7 @@ bool Ray::Vk::Context::Init(ILog *log, const VulkanDevice &vk_device, const Vulk

if (!external_ && !InitVkDevice(api_, device_, physical_device_, graphics_family_index_, raytracing_supported_,
ray_query_supported_, fp16_supported_, int64_supported_, int64_atomics_supported_,
coop_matrix_supported_, log)) {
coop_matrix_supported_, pageable_memory_supported_, log)) {
return false;
}

Expand Down Expand Up @@ -420,7 +420,7 @@ bool Ray::Vk::Context::InitVkInstance(const Api &api, VkInstance &instance, cons
#if defined(VK_USE_PLATFORM_MACOS_MVK)
instance_info.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR;
#endif

static const VkValidationFeatureEnableEXT enabled_validation_features[] = {
VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
Expand Down Expand Up @@ -543,11 +543,14 @@ bool Ray::Vk::Context::ChooseVkPhysicalDevice(const Api &api, VkPhysicalDevice &
return true;
}

void Ray::Vk::Context::CheckVkPhysicalDeviceFeatures(
const Api &api, VkPhysicalDevice &physical_device, VkPhysicalDeviceProperties &out_device_properties,
VkPhysicalDeviceMemoryProperties &out_mem_properties, uint32_t &out_graphics_family_index,
bool &out_raytracing_supported, bool &out_ray_query_supported, bool &out_shader_fp16_supported,
bool &out_shader_int64_supported, bool &out_int64_atomics_supported, bool &out_coop_matrix_supported) {
void Ray::Vk::Context::CheckVkPhysicalDeviceFeatures(const Api &api, VkPhysicalDevice &physical_device,
VkPhysicalDeviceProperties &out_device_properties,
VkPhysicalDeviceMemoryProperties &out_mem_properties,
uint32_t &out_graphics_family_index,
bool &out_raytracing_supported, bool &out_ray_query_supported,
bool &out_shader_fp16_supported, bool &out_shader_int64_supported,
bool &out_int64_atomics_supported, bool &out_coop_matrix_supported,
bool &out_pageable_memory_supported) {
api.vkGetPhysicalDeviceProperties(physical_device, &out_device_properties);
api.vkGetPhysicalDeviceMemoryProperties(physical_device, &out_mem_properties);

Expand All @@ -572,7 +575,8 @@ void Ray::Vk::Context::CheckVkPhysicalDeviceFeatures(

bool acc_struct_supported = false, raytracing_supported = false, ray_query_supported = false,
shader_fp16_supported = false, shader_int64_supported = false, storage_fp16_supported = false,
coop_matrix_supported = false, shader_buf_int64_atomics_supported = false;
coop_matrix_supported = false, shader_buf_int64_atomics_supported = false, memory_priority_supported = false,
pageable_memory_supported = false;

{ // check for features support
uint32_t extension_count;
Expand All @@ -598,6 +602,10 @@ void Ray::Vk::Context::CheckVkPhysicalDeviceFeatures(
coop_matrix_supported = true;
} else if (strcmp(ext.extensionName, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME) == 0) {
shader_buf_int64_atomics_supported = true;
} else if (strcmp(ext.extensionName, VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME) == 0) {
memory_priority_supported = true;
} else if (strcmp(ext.extensionName, VK_EXT_PAGEABLE_DEVICE_LOCAL_MEMORY_EXTENSION_NAME) == 0) {
pageable_memory_supported = true;
}
}

Expand Down Expand Up @@ -662,12 +670,13 @@ void Ray::Vk::Context::CheckVkPhysicalDeviceFeatures(
out_shader_int64_supported = shader_int64_supported;
out_int64_atomics_supported = shader_buf_int64_atomics_supported;
out_coop_matrix_supported = coop_matrix_supported;
out_pageable_memory_supported = (memory_priority_supported && pageable_memory_supported);
}

bool Ray::Vk::Context::InitVkDevice(const Api &api, VkDevice &device, VkPhysicalDevice physical_device,
uint32_t graphics_family_index, bool enable_raytracing, bool enable_ray_query,
bool enable_fp16, bool enable_int64, bool enable_int64_atomics,
bool enable_coop_matrix, ILog *log) {
bool enable_coop_matrix, bool enable_pageable_memory, ILog *log) {
VkDeviceQueueCreateInfo queue_create_infos[2] = {{}, {}};
const float queue_priorities[] = {1.0f};

Expand Down Expand Up @@ -719,6 +728,11 @@ bool Ray::Vk::Context::InitVkDevice(const Api &api, VkDevice &device, VkPhysical
device_extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
}

if (enable_pageable_memory) {
device_extensions.push_back(VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME);
device_extensions.push_back(VK_EXT_PAGEABLE_DEVICE_LOCAL_MEMORY_EXTENSION_NAME);
}

device_info.enabledExtensionCount = device_extensions.size();
device_info.ppEnabledExtensionNames = device_extensions.cdata();

Expand Down Expand Up @@ -813,6 +827,15 @@ bool Ray::Vk::Context::InitVkDevice(const Api &api, VkDevice &device, VkPhysical
pp_next = &atomic_int64_features.pNext;
}

VkPhysicalDevicePageableDeviceLocalMemoryFeaturesEXT pageable_mem_features = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT};
pageable_mem_features.pageableDeviceLocalMemory = VK_TRUE;

if (enable_pageable_memory) {
(*pp_next) = &pageable_mem_features;
pp_next = &pageable_mem_features.pNext;
}

#if defined(VK_USE_PLATFORM_MACOS_MVK)
VkPhysicalDevicePortabilitySubsetFeaturesKHR subset_features = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_FEATURES_KHR};
Expand Down
6 changes: 4 additions & 2 deletions internal/Vk/ContextVK.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class Context {

bool coop_matrix_supported_ = false;

bool pageable_memory_supported_ = false;

uint32_t supported_stages_mask_ = 0xffffffff;

VkQueue graphics_queue_ = {};
Expand Down Expand Up @@ -154,11 +156,11 @@ class Context {
uint32_t &graphics_family_index, bool &out_raytracing_supported,
bool &out_ray_query_supported, bool &out_shader_fp16_supported,
bool &out_shader_int64_supported, bool &out_int64_atomics_supported,
bool &out_coop_matrix_supported);
bool &out_coop_matrix_supported, bool &out_pageable_memory_supported);
static bool InitVkDevice(const Api &api, VkDevice &device, VkPhysicalDevice physical_device,
uint32_t graphics_family_index, bool enable_raytracing, bool enable_ray_query,
bool enable_fp16, bool enable_int64, bool enable_int64_atomics, bool enable_coop_matrix,
ILog *log);
bool enable_pageable_memory, ILog *log);
static bool InitCommandBuffers(const Api &api, VkCommandPool &command_pool, VkCommandPool &temp_command_pool,
VkCommandBuffer draw_cmd_bufs[MaxFramesInFlight],
VkSemaphore render_finished_semaphores[MaxFramesInFlight],
Expand Down
17 changes: 4 additions & 13 deletions internal/Vk/MemoryAllocatorVK.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,23 +105,14 @@ Ray::Vk::MemAllocation Ray::Vk::MemAllocators::Allocate(const uint32_t alignment
return {};
}

int alloc_index = -1;
for (int i = 0; i < int(allocators_.size()); ++i) {
if (allocators_[i]->mem_type_index() == mem_type_index) {
alloc_index = i;
break;
}
}

if (alloc_index == -1) {
if (!allocators_[mem_type_index]) {
std::string name = name_;
name += " (type " + std::to_string(mem_type_index) + ")";
alloc_index = int(allocators_.size());
allocators_.emplace_back(std::make_unique<MemAllocator>(name.c_str(), ctx_, initial_pool_size_, mem_type_index,
growth_factor_, max_pool_size_));
allocators_[mem_type_index] = std::make_unique<MemAllocator>(name.c_str(), ctx_, initial_pool_size_,
mem_type_index, growth_factor_, max_pool_size_);
}

return allocators_[alloc_index]->Allocate(alignment, size);
return allocators_[mem_type_index]->Allocate(alignment, size);
}

Ray::Vk::MemAllocation Ray::Vk::MemAllocators::Allocate(const VkMemoryRequirements &mem_req,
Expand Down
2 changes: 1 addition & 1 deletion internal/Vk/MemoryAllocatorVK.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class MemAllocators {
uint32_t initial_pool_size_;
float growth_factor_;
uint32_t max_pool_size_;
SmallVector<std::unique_ptr<MemAllocator>, 4> allocators_;
std::unique_ptr<MemAllocator> allocators_[32];

public:
MemAllocators(const char *name, Context *ctx, const uint32_t initial_pool_size, const float growth_factor,
Expand Down
6 changes: 3 additions & 3 deletions internal/shaders/output/bake_sky.comp.cso.inl

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/shaders/output/shade_primary_atlas.comp.cso.inl

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/shaders/output/shade_primary_atlas.comp.spv.inl

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/shaders/output/shade_primary_atlas_sky.comp.cso.inl

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/shaders/output/shade_primary_atlas_sky.comp.spv.inl

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/shaders/output/shade_primary_bindless.comp.cso.inl

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/shaders/output/shade_primary_bindless.comp.spv.inl

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/shaders/output/shade_secondary_atlas.comp.cso.inl

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/shaders/output/shade_secondary_atlas.comp.spv.inl

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/shaders/output/shade_secondary_bindless.comp.cso.inl

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/shaders/output/shade_secondary_bindless.comp.spv.inl

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion internal/shaders/output/shade_sky.comp.cso.inl

Large diffs are not rendered by default.

Loading