Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/api/QueryPool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ void QueryPool::shader_profile_begin(
uint32_t query_idx = write_timestamp(cmd);

ShaderDuration log_entry{
api::utils::safe_downcast<uint32_t>(shader_durations_.size()),
utils::safe_downcast<uint32_t>(shader_durations_.size()),
// Execution Properties
dispatch_id,
kernel_name,
Expand Down
60 changes: 29 additions & 31 deletions backends/vulkan/runtime/api/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ std::vector<int64_t> calculate_strides(
size_t ndim = sizes.size();
std::vector<int64_t> strides(ndim);

const int64_t last_dim_size = texel_strides
? api::utils::div_up_4(sizes.at(last_dim))
: sizes.at(last_dim);
const int64_t last_dim_size =
texel_strides ? utils::div_up_4(sizes.at(last_dim)) : sizes.at(last_dim);

for (int stride_d = ndim - 1; stride_d >= 0; stride_d--) {
strides.at(stride_d) = 1;
Expand All @@ -51,31 +50,30 @@ std::vector<int64_t> calculate_padded_sizes(
}

// Tensor sizes will be unsqueezed up to the next multiple of 4
const int64_t ndim_up4 = api::utils::align_up_4(ndim);
const int64_t ndim_up4 = utils::align_up_4(ndim);
std::vector<int64_t> padded_sizes(ndim_up4);
for (int64_t i = 0; i < ndim_up4; ++i) {
padded_sizes.at(i) = api::utils::val_at(i - ndim_up4, sizes);
padded_sizes.at(i) = utils::val_at(i - ndim_up4, sizes);
}

// Pad the packed dim to the next multiple of 4.
const int64_t dim_offset =
api::to_packed_dim_nchw_offset<int64_t>(memory_layout);
const int64_t padded_dim_size = api::utils::val_at(-dim_offset, sizes);
padded_sizes.at(ndim_up4 - dim_offset) =
api::utils::align_up_4(padded_dim_size);
const int64_t padded_dim_size = utils::val_at(-dim_offset, sizes);
padded_sizes.at(ndim_up4 - dim_offset) = utils::align_up_4(padded_dim_size);

return padded_sizes;
}

api::utils::uvec3 calculate_image_extents(
utils::uvec3 calculate_image_extents(
const std::vector<int64_t>& padded_sizes,
const api::GPUMemoryLayout memory_layout) {
VK_CHECK_COND(padded_sizes.size() == 4);

uint32_t N = api::utils::safe_downcast<uint32_t>(padded_sizes.at(0));
uint32_t C = api::utils::safe_downcast<uint32_t>(padded_sizes.at(1));
uint32_t H = api::utils::safe_downcast<uint32_t>(padded_sizes.at(2));
uint32_t W = api::utils::safe_downcast<uint32_t>(padded_sizes.at(3));
uint32_t N = utils::safe_downcast<uint32_t>(padded_sizes.at(0));
uint32_t C = utils::safe_downcast<uint32_t>(padded_sizes.at(1));
uint32_t H = utils::safe_downcast<uint32_t>(padded_sizes.at(2));
uint32_t W = utils::safe_downcast<uint32_t>(padded_sizes.at(3));

switch (memory_layout) {
case api::kWidthPacked:
Expand Down Expand Up @@ -126,10 +124,10 @@ vTensor::vTensor(
dtype_,
allocate_memory) {
if (storage_type != api::kBuffer) {
texture_limits_.limits = api::utils::ivec3{
api::utils::safe_downcast<int32_t>(storage_.image_extents_.data[0]),
api::utils::safe_downcast<int32_t>(storage_.image_extents_.data[1]),
api::utils::safe_downcast<int32_t>(storage_.image_extents_.data[2])};
texture_limits_.limits = utils::ivec3{
utils::safe_downcast<int32_t>(storage_.image_extents_.data[0]),
utils::safe_downcast<int32_t>(storage_.image_extents_.data[1]),
utils::safe_downcast<int32_t>(storage_.image_extents_.data[2])};
}

if (dtype == api::kHalf) {
Expand Down Expand Up @@ -172,8 +170,8 @@ api::VulkanBuffer& vTensor::buffer(

const api::BufferBindInfo vTensor::sizes_ubo() {
if (!sizes_uniform_.buffer()) {
sizes_uniform_ = api::ParamsBuffer(
storage_.context_, api::utils::make_whcn_ivec4(sizes_));
sizes_uniform_ =
api::ParamsBuffer(storage_.context_, utils::make_whcn_ivec4(sizes_));
}
return api::BufferBindInfo(sizes_uniform_.buffer());
}
Expand All @@ -190,7 +188,7 @@ const api::BufferBindInfo vTensor::texel_strides_ubo() {
if (!texel_strides_uniform_.buffer()) {
texel_strides_uniform_ = api::ParamsBuffer(
storage_.context_,
api::utils::make_whcn_ivec4(
utils::make_whcn_ivec4(
calculate_strides(padded_sizes_, memory_layout_)));
}
return api::BufferBindInfo(texel_strides_uniform_.buffer());
Expand Down Expand Up @@ -243,23 +241,23 @@ void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {

// Calculate the extents of the image texture that would have been required
// for a tensor of the new sizes.
api::utils::uvec3 virtual_extents =
utils::uvec3 virtual_extents =
calculate_image_extents(padded_sizes_, memory_layout_);

// Update the texture limits to reflect the new virtual extents.
texture_limits_.limits = api::utils::ivec3{
api::utils::safe_downcast<int32_t>(virtual_extents.data[0]),
api::utils::safe_downcast<int32_t>(virtual_extents.data[1]),
api::utils::safe_downcast<int32_t>(virtual_extents.data[2])};
texture_limits_.limits = utils::ivec3{
utils::safe_downcast<int32_t>(virtual_extents.data[0]),
utils::safe_downcast<int32_t>(virtual_extents.data[1]),
utils::safe_downcast<int32_t>(virtual_extents.data[2])};

if (sizes_uniform_.buffer()) {
sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_));
sizes_uniform_.update(utils::make_whcn_ivec4(sizes_));
}
if (texture_limits_uniform_.buffer()) {
texture_limits_uniform_.update(texture_limits_);
}
if (texel_strides_uniform_.buffer()) {
texel_strides_uniform_.update(api::utils::make_whcn_ivec4(
texel_strides_uniform_.update(utils::make_whcn_ivec4(
calculate_strides(padded_sizes_, memory_layout_)));
}
if (ntexels_uniform_.buffer()) {
Expand All @@ -279,7 +277,7 @@ void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
if (storage_type() != api::kBuffer) {
// For texture storage check that the current texture is large enough for
// the new sizes of the tensor.
api::utils::uvec3 virtual_extents =
utils::uvec3 virtual_extents =
calculate_image_extents(padded_sizes_, memory_layout_);

bool valid_resize = virtual_extents.data[0] <= image_extents().data[0];
Expand All @@ -302,7 +300,7 @@ void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {

api::VulkanImage allocate_image(
api::Context* const context_ptr,
api::utils::uvec3& image_extents,
utils::uvec3& image_extents,
const api::StorageType storage_type,
const VkFormat image_format,
const bool allocate_memory) {
Expand Down Expand Up @@ -375,7 +373,7 @@ vTensorStorage::vTensorStorage(
: context_(context),
storage_type_{storage_type},
image_extents_(calculate_image_extents(padded_sizes, gpu_memory_layout)),
buffer_length_{api::utils::multiply_integers(padded_sizes)},
buffer_length_{utils::multiply_integers(padded_sizes)},
image_(allocate_image(
context_,
image_extents_,
Expand Down Expand Up @@ -474,7 +472,7 @@ void vTensorStorage::discard_and_reallocate(
api::to_vkformat(dtype),
image_owns_memory);

buffer_length_ = api::utils::multiply_integers(padded_sizes);
buffer_length_ = utils::multiply_integers(padded_sizes);
buffer_ = allocate_buffer(
context_, buffer_length_, storage_type_, dtype, buffer_owns_memory);
}
Expand Down
16 changes: 8 additions & 8 deletions backends/vulkan/runtime/api/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ std::vector<int64_t> calculate_padded_sizes(
* Given the padded sizes of a tensor and the GPU memory layout, calculate the
* 3D image extents required to store the tensor data as an image texture.
*/
api::utils::uvec3 calculate_image_extents(
utils::uvec3 calculate_image_extents(
const std::vector<int64_t>& padded_sizes,
const api::GPUMemoryLayout memory_layout);

Expand Down Expand Up @@ -102,7 +102,7 @@ class vTensorStorage final {
api::StorageType storage_type_;

// Resource sizings
api::utils::uvec3 image_extents_{};
utils::uvec3 image_extents_{};
int64_t buffer_length_{};

// GPU Storage
Expand Down Expand Up @@ -141,7 +141,7 @@ class vTensor final {
// Alignment is required to conform with Vulkan specification; a 3 or 4
// component vector with components of size N must have base alignment of
// 4N.
alignas(16) api::utils::ivec3 limits;
alignas(16) utils::ivec3 limits;
};

public:
Expand Down Expand Up @@ -231,7 +231,7 @@ class vTensor final {
return storage_.storage_type_ == api::kBuffer;
}

inline const api::utils::uvec3& image_extents() const {
inline const utils::uvec3& image_extents() const {
return storage_.image_extents_;
}

Expand Down Expand Up @@ -291,12 +291,12 @@ class vTensor final {
*/
const api::BufferBindInfo ntexels_ubo();

inline const api::utils::ivec3 texture_limits() const {
inline const utils::ivec3 texture_limits() const {
return texture_limits_.limits;
}

inline size_t numel() const {
return api::utils::multiply_integers(sizes());
return utils::multiply_integers(sizes());
}

inline size_t nbytes() const {
Expand All @@ -307,15 +307,15 @@ class vTensor final {
* Returns numel but based on padded_sizes_ instead of sizes_
*/
inline size_t gpu_numel() const {
return api::utils::multiply_integers(padded_sizes_);
return utils::multiply_integers(padded_sizes_);
}

/*
* Returns the number of texels in the image texture or texel buffer used to
* store the tensor's data.
*/
inline int32_t texel_numel() const {
return api::utils::safe_downcast<int32_t>(gpu_numel() / 4);
return utils::safe_downcast<int32_t>(gpu_numel() / 4);
}

/*
Expand Down
6 changes: 3 additions & 3 deletions backends/vulkan/runtime/api/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
#endif //_MSC_VER

namespace vkcompute {
namespace api {

namespace utils {

//
Expand Down Expand Up @@ -299,7 +297,7 @@ inline detail::vec<T, N> divup_vec(
const detail::vec<T, N>& b) {
detail::vec<T, N> result;
for (uint32_t i = 0; i < N; ++i) {
result.data[i] = api::utils::div_up(a.data[i], b.data[i]);
result.data[i] = utils::div_up(a.data[i], b.data[i]);
}
return result;
}
Expand Down Expand Up @@ -462,6 +460,8 @@ inline int64_t multiply_integers(Iter begin, Iter end) {

} // namespace utils

namespace api {

inline VkExtent3D create_extent3d(const utils::uvec3& extents) {
return VkExtent3D{extents.data[0u], extents.data[1u], extents.data[2u]};
}
Expand Down
10 changes: 5 additions & 5 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ api::GPUMemoryLayout ComputeGraph::suggested_memory_layout(
}
// For 3 dimensional tensors that only have a channels dimension of 1, still
// prefer width packed.
if (api::utils::val_at(-3, sizes) == 1) {
if (utils::val_at(-3, sizes) == 1) {
return api::kWidthPacked;
}
return api::kChannelsPacked;
Expand Down Expand Up @@ -312,14 +312,14 @@ void ComputeGraph::update_descriptor_counts(
}
}

api::utils::uvec3 ComputeGraph::create_global_wg_size(const ValueRef idx) {
utils::uvec3 ComputeGraph::create_global_wg_size(const ValueRef idx) {
if (is_buffer_storage(idx)) {
return {uint32_t(texel_numel_of(idx)), 1u, 1u};
}
return image_extents_of(idx);
}

api::utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) {
utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) {
if (config_.enable_local_wg_size_override) {
return config_.local_wg_size_override;
}
Expand All @@ -328,8 +328,8 @@ api::utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) {
return {64u, 1u, 1u};
}

const api::utils::uvec3 image_extents = image_extents_of(idx);
api::utils::uvec3 local_group_size = {4, 4, 4};
const utils::uvec3 image_extents = image_extents_of(idx);
utils::uvec3 local_group_size = {4, 4, 4};

if (image_extents.data[2u] == 1) {
if (image_extents.data[1u] == 1) {
Expand Down
6 changes: 3 additions & 3 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ class ComputeGraph final {

api::ScalarType dtype_of(const ValueRef idx) const;

inline api::utils::uvec3 image_extents_of(const ValueRef idx) const {
inline utils::uvec3 image_extents_of(const ValueRef idx) const {
return values_.at(idx).toConstTensor().image_extents();
}

Expand Down Expand Up @@ -454,7 +454,7 @@ class ComputeGraph final {
* All other components will be set to 1 (i.e. {ntexels, 1, 1} will be
* returned).
*/
api::utils::uvec3 create_global_wg_size(const ValueRef idx);
utils::uvec3 create_global_wg_size(const ValueRef idx);

/*
* Suggest a local workgroup size for a given `vTensor` value, assuming that
Expand All @@ -466,7 +466,7 @@ class ComputeGraph final {
* Currently, the local workgroup size is hard-coded to contain a total of 64
* shader invocations. In the future, this value can be configured.
*/
api::utils::uvec3 create_local_wg_size(const ValueRef idx);
utils::uvec3 create_local_wg_size(const ValueRef idx);

//
// Input/Output
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/graph/GraphConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ void GraphConfig::set_memory_layout_override(
}

void GraphConfig::set_local_wg_size_override(
const api::utils::uvec3& local_wg_size) {
const utils::uvec3& local_wg_size) {
enable_local_wg_size_override = true;
local_wg_size_override = local_wg_size;
}
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/GraphConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ struct GraphConfig final {
bool enable_querypool;

bool enable_local_wg_size_override;
api::utils::uvec3 local_wg_size_override;
utils::uvec3 local_wg_size_override;

// Generate a default graph config with pre-configured settings
explicit GraphConfig();

void set_storage_type_override(api::StorageType storage_type);
void set_memory_layout_override(api::GPUMemoryLayout memory_layout);
void set_local_wg_size_override(const api::utils::uvec3& local_wg_size);
void set_local_wg_size_override(const utils::uvec3& local_wg_size);
};

} // namespace vkcompute
16 changes: 8 additions & 8 deletions backends/vulkan/runtime/graph/Logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,20 @@ inline std::ostream& operator<<(std::ostream& os, const std::vector<T>& vec) {
return os; // Return the ostream to allow chaining
}

inline std::ostream& operator<<(std::ostream& os, const api::utils::uvec3& v) {
return api::utils::operator<<(os, v);
inline std::ostream& operator<<(std::ostream& os, const utils::uvec3& v) {
return utils::operator<<(os, v);
}

inline std::ostream& operator<<(std::ostream& os, const api::utils::uvec4& v) {
return api::utils::operator<<(os, v);
inline std::ostream& operator<<(std::ostream& os, const utils::uvec4& v) {
return utils::operator<<(os, v);
}

inline std::ostream& operator<<(std::ostream& os, const api::utils::ivec3& v) {
return api::utils::operator<<(os, v);
inline std::ostream& operator<<(std::ostream& os, const utils::ivec3& v) {
return utils::operator<<(os, v);
}

inline std::ostream& operator<<(std::ostream& os, const api::utils::ivec4& v) {
return api::utils::operator<<(os, v);
inline std::ostream& operator<<(std::ostream& os, const utils::ivec4& v) {
return utils::operator<<(os, v);
}

template <typename T>
Expand Down
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ namespace vkcompute {
ExecuteNode::ExecuteNode(
ComputeGraph& graph,
const api::ShaderInfo& shader,
const api::utils::uvec3& global_workgroup_size,
const api::utils::uvec3& local_workgroup_size,
const utils::uvec3& global_workgroup_size,
const utils::uvec3& local_workgroup_size,
const std::vector<ArgGroup>& args,
const api::ParamsBindList& params,
const api::SpecVarList& spec_vars,
Expand Down
Loading