Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 83 additions & 5 deletions detection_6d_foundationpose/src/foundationpose_render.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ FoundationPoseRenderer::PrepareBuffer()
// nvdiffrast render 用到的缓存以及渲染器
size_t pose_clip_size = num_vertices_ * (kVertexPoints + 1) * input_poses_num_ * sizeof(float);
size_t pts_cam_size = num_vertices_ * kVertexPoints * input_poses_num_ * sizeof(float);
size_t diffuse_intensity_size = num_vertices_ * input_poses_num_ * sizeof(float);
size_t diffuse_intensity_map_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * sizeof(float);
size_t rast_out_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * (kVertexPoints + 1) * sizeof(float);
size_t color_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof(float);
size_t xyz_map_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof(float);
Expand All @@ -279,6 +281,8 @@ FoundationPoseRenderer::PrepareBuffer()
float* _pose_clip_device;
float* _rast_out_device;
float* _pts_cam_device;
float* _diffuse_intensity_device;
float* _diffuse_intensity_map_device;
float* _texcoords_out_device;
float* _color_device;
float* _xyz_map_device;
Expand Down Expand Up @@ -307,6 +311,14 @@ FoundationPoseRenderer::PrepareBuffer()
"[FoundationPoseRenderer] cudaMalloc `_pts_cam_device` FAILED!!!");
pts_cam_device_ = DeviceBufferUniquePtrType<float>(_pts_cam_device, CudaMemoryDeleter<float>());

CHECK_CUDA(cudaMalloc(&_diffuse_intensity_device, diffuse_intensity_size),
"[FoundationPoseRenderer] cudaMalloc `_diffuse_intensity_device` FAILED!!!");
diffuse_intensity_device_ = DeviceBufferUniquePtrType<float>(_diffuse_intensity_device, CudaMemoryDeleter<float>());

CHECK_CUDA(cudaMalloc(&_diffuse_intensity_map_device, diffuse_intensity_map_size),
"[FoundationPoseRenderer] cudaMalloc `_diffuse_intensity_map_device` FAILED!!!");
diffuse_intensity_map_device_ = DeviceBufferUniquePtrType<float>(_diffuse_intensity_map_device, CudaMemoryDeleter<float>());

CHECK_CUDA(cudaMalloc(&_texcoords_out_device, texcoords_out_size),
"[FoundationPoseRenderer] cudaMalloc `_texcoords_out_device` FAILED!!!");
texcoords_out_device_ = DeviceBufferUniquePtrType<float>(_texcoords_out_device, CudaMemoryDeleter<float>());
Expand Down Expand Up @@ -361,18 +373,24 @@ FoundationPoseRenderer::LoadTexturedMesh()
{
const auto& mesh_model_center = mesh_loader_->GetMeshModelCenter();
const auto& mesh_vertices = mesh_loader_->GetMeshVertices();
const auto& mesh_vertex_normals = mesh_loader_->GetMeshVertexNormals();
const auto& mesh_texcoords = mesh_loader_->GetMeshTextureCoords();
const auto& mesh_faces = mesh_loader_->GetMeshFaces();
const auto& rgb_texture_map = mesh_loader_->GetTextureMap();
mesh_diameter_ = mesh_loader_->GetMeshDiameter();

std::vector<float> vertex_normals;

// Walk through each of the mesh's vertices
for (unsigned int v = 0; v < mesh_vertices.size(); v++) {
vertices_.push_back(mesh_vertices[v].x - mesh_model_center[0]);
vertices_.push_back(mesh_vertices[v].y - mesh_model_center[1]);
vertices_.push_back(mesh_vertices[v].z - mesh_model_center[2]);

vertex_normals.push_back(mesh_vertex_normals[v].x);
vertex_normals.push_back(mesh_vertex_normals[v].y);
vertex_normals.push_back(mesh_vertex_normals[v].z);

// Check if the mesh has texture coordinates
if (mesh_texcoords.size() >= 1) {
texcoords_.push_back(mesh_texcoords[0][v].x);
Expand Down Expand Up @@ -422,6 +440,7 @@ FoundationPoseRenderer::LoadTexturedMesh()
size_t texcoords_size = texcoords_.size() * sizeof(float);

float* _vertices_device;
float* _vertex_normals_device;
float* _texcoords_device;
int32_t* _mesh_faces_device;
uint8_t* _texture_map_device;
Expand All @@ -430,6 +449,10 @@ FoundationPoseRenderer::LoadTexturedMesh()
"[FoundationposeRender] cudaMalloc `mesh_faces_device` FAILED!!!");
vertices_device_ = DeviceBufferUniquePtrType<float>(_vertices_device, CudaMemoryDeleter<float>());

CHECK_CUDA(cudaMalloc(&_vertex_normals_device, vertices_size),
"[FoundationposeRender] cudaMalloc `vertex_normals_device` FAILED!!!");
vertex_normals_device_ = DeviceBufferUniquePtrType<float>(_vertex_normals_device, CudaMemoryDeleter<float>());

CHECK_CUDA(cudaMalloc(&_mesh_faces_device, faces_size),
"[FoundationposeRender] cudaMalloc `mesh_faces_device` FAILED!!!");
mesh_faces_device_ = DeviceBufferUniquePtrType<int32_t>(_mesh_faces_device, CudaMemoryDeleter<int32_t>());
Expand All @@ -442,9 +465,14 @@ FoundationPoseRenderer::LoadTexturedMesh()
"[FoundationposeRender] cudaMalloc `texture_map_device_` FAILED!!!");
texture_map_device_ = DeviceBufferUniquePtrType<uint8_t>(_texture_map_device, CudaMemoryDeleter<uint8_t>());

CHECK_CUDA(cudaMemcpy(vertices_device_.get(),
vertices_.data(),
vertices_size,
CHECK_CUDA(cudaMemcpy(vertices_device_.get(),
vertices_.data(),
vertices_size,
cudaMemcpyHostToDevice),
"[FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!");
CHECK_CUDA(cudaMemcpy(vertex_normals_device_.get(),
vertex_normals.data(),
vertices_size,
cudaMemcpyHostToDevice),
"[FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!");
CHECK_CUDA(cudaMemcpy(mesh_faces_device_.get(),
Expand Down Expand Up @@ -514,6 +542,36 @@ bool FoundationPoseRenderer::TransformVerticesOnCUDA(cudaStream_t stream,
return true;
}

bool FoundationPoseRenderer::TransformVertexNormalsOnCUDA(cudaStream_t stream,
const std::vector<Eigen::MatrixXf>& tfs,
float* output_buffer)
{
// Get the dimensions of the inputs
int tfs_size = tfs.size();
CHECK_STATE(tfs_size != 0,
"[FoundationposeRender] The transfomation matrix is empty! ");

CHECK_STATE(tfs[0].cols() == tfs[0].rows(),
"[FoundationposeRender] The transfomation matrix has different rows and cols! ");

const int total_elements = tfs[0].cols() * tfs[0].rows();

float* transform_device_buffer_ = nullptr;
cudaMallocAsync(&transform_device_buffer_, tfs_size * total_elements * sizeof(float), stream);

for (int i = 0 ; i < tfs_size ; ++ i) {
cudaMemcpyAsync(transform_device_buffer_ + i * total_elements,
tfs[i].data(),
total_elements * sizeof(float),
cudaMemcpyHostToDevice,
stream);
}

foundationpose_render::transform_normals(stream, transform_device_buffer_, tfs_size, vertex_normals_device_.get(), num_vertices_, output_buffer);

cudaFreeAsync(transform_device_buffer_, stream);
return true;
}

bool FoundationPoseRenderer::GeneratePoseClipOnCUDA(cudaStream_t stream,
float* output_buffer,
Expand Down Expand Up @@ -595,15 +653,15 @@ FoundationPoseRenderer::NvdiffrastRender(cudaStream_t cuda_stream,
foundationpose_render::interpolate(
cuda_stream,
pts_cam_device_.get(), rast_out_device_.get(), mesh_faces_device_.get(), xyz_map_device_.get(),
num_vertices_, num_faces_, kVertexPoints,
num_vertices_, num_faces_, 3, kVertexPoints,
H, W, N);
CHECK_CUDA(cudaGetLastError(),
"[FoundationPoseRenderer] interpolate failed!!!");

foundationpose_render::interpolate(
cuda_stream,
texcoords_device_.get(), rast_out_device_.get(), mesh_faces_device_.get(), texcoords_out_device_.get(),
num_vertices_, num_faces_, kTexcoordsDim,
num_vertices_, num_faces_, 2, kTexcoordsDim,
H, W, N);
CHECK_CUDA(cudaGetLastError(),
"[FoundationPoseRenderer] interpolate failed!!!");
Expand All @@ -619,6 +677,26 @@ FoundationPoseRenderer::NvdiffrastRender(cudaStream_t cuda_stream,
CHECK_CUDA(cudaGetLastError(),
"[FoundationPoseRenderer] texture failed!!!");

CHECK_STATE(TransformVertexNormalsOnCUDA(cuda_stream, poses, diffuse_intensity_device_.get()),
"[FoundationPoseRenderer] Transform vertex normals failed!!!");

foundationpose_render::interpolate(cuda_stream,
diffuse_intensity_device_.get(),
rast_out_device_.get(),
mesh_faces_device_.get(),
diffuse_intensity_map_device_.get(),
num_vertices_, num_faces_, 3, 1, H, W, N);
CHECK_CUDA(cudaGetLastError(),
"[FoundationPoseRenderer] interpolate failed!!!");

foundationpose_render::refine_color(cuda_stream, color_device_.get(),
diffuse_intensity_map_device_.get(),
rast_out_device_.get(),
color_device_.get(),
poses.size(), 0.8, 0.5, H, W);
CHECK_CUDA(cudaGetLastError(),
"[FoundationPoseRenderer] refine_color failed!!!");

float min_value = 0.0;
float max_value = 1.0;
foundationpose_render::clamp(cuda_stream, color_device_.get(), min_value, max_value, N * H * W * kNumChannels);
Expand Down
88 changes: 86 additions & 2 deletions detection_6d_foundationpose/src/foundationpose_render.cu
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ void rasterize(

void interpolate(
cudaStream_t stream, float* attr_ptr, float* rast_ptr, int32_t* tri_ptr, float* out, int num_vertices,
int num_triangles, int attr_dim, int H, int W, int C) {
int instance_mode = attr_dim > 2 ? 1 : 0;
int num_triangles, int attr_shape_dim, int attr_dim, int H, int W, int C) {
int instance_mode = attr_shape_dim > 2 ? 1 : 0;

InterpolateKernelParams p = {}; // Initialize all fields to zero.
p.instance_mode = instance_mode;
Expand Down Expand Up @@ -339,4 +339,88 @@ void generate_pose_clip(cudaStream_t stream, const float* transform_matrixs, con
transform_matrixs, bbox2d_matrix, M, points_vectors, N, transformed_points_vectors, rgb_H, rgb_W);
}


__global__ void transform_normals_kernel(
const float* transform_matrixs, int M, const float* normals_vectors,
int N, float* transformed_normal_vectors)
{
int row_idx = threadIdx.y + blockIdx.y * blockDim.y;
int col_idx = threadIdx.x + blockIdx.x * blockDim.x;
if (row_idx >= M || col_idx >= N) return;

const float* matrix = transform_matrixs + row_idx * 16; // 指向当前 4x4 变换矩阵
const float* normal = normals_vectors + col_idx * 3; // 指向当前 normal 向量
float* transformed_normal = transformed_normal_vectors + (row_idx * N + col_idx);

float x = normal[0], y = normal[1], z = normal[2];
// **Column-Major 访问方式**
float tx = matrix[0] * x + matrix[4] * y + matrix[8] * z;
float ty = matrix[1] * x + matrix[5] * y + matrix[9] * z;
float tz = matrix[2] * x + matrix[6] * y + matrix[10] * z;
// 只保留z方向的分量,取反
float l2 = sqrt(tx*tx + ty*ty + tz*tz);
float value = l2 == 0 ? 0 : - tz / l2;
value = clamp_func(value, 0, 1);
transformed_normal[0] = value;
}

void transform_normals(cudaStream_t stream, const float* transform_matrixs, int M, const float* normals_vectors,
int N, float* transformed_normal_vectors)
{
dim3 blockSize = {32, 32};
dim3 gridSize = {ceil_div(N, 32), ceil_div(M, 32)};

transform_normals_kernel<<<gridSize, blockSize, 0, stream>>>(
transform_matrixs, M, normals_vectors, N, transformed_normal_vectors);
}


__global__ void renfine_color_kernel(
const float* color, const float* diffuse_intensity_map, const float* rast_out, float* output, int poses_num, float w_ambient,
float w_diffuse, int rgb_H, int rgb_W)
{
int row_idx = threadIdx.y + blockIdx.y * blockDim.y;
int col_idx = threadIdx.x + blockIdx.x * blockDim.x;
if (row_idx >= rgb_H || col_idx >= rgb_W * poses_num) return;

const int color_idx = col_idx / rgb_W;
const int color_row_idx = row_idx;
const int color_col_idx = col_idx - color_idx * rgb_W;

const size_t pixel_idx = color_row_idx * rgb_W + color_col_idx;
const size_t pixel_offset = color_idx * rgb_H * rgb_W + pixel_idx;

const float* rgb = color + pixel_offset * 3;
const float* diffuse = diffuse_intensity_map + pixel_offset;
const float* rast = rast_out + pixel_offset * 4;
float* out = output + pixel_offset * 3;

float diff = diffuse[0];

float is_foreground = clamp_func(rast[3], 0, 1);

float r = rgb[0] * (w_ambient + diff*w_diffuse) * is_foreground;
float g = rgb[1] * (w_ambient + diff*w_diffuse) * is_foreground;
float b = rgb[2] * (w_ambient + diff*w_diffuse) * is_foreground;

r = clamp_func(r, 0, 1);
g = clamp_func(g, 0, 1);
b = clamp_func(b, 0, 1);

out[0] = r;
out[1] = g;
out[2] = b;
}

void refine_color(cudaStream_t stream, const float* color, const float* diffuse_intensity_map, const float* rast_out, float* output,
int poses_num, float w_ambient, float w_diffuse, int rgb_H, int rgb_W)
{
dim3 blockSize = {32, 32};
dim3 gridSize = {ceil_div(rgb_W * poses_num, 32), ceil_div(rgb_H, 32)};

renfine_color_kernel<<<gridSize, blockSize, 0, stream>>>(
color, diffuse_intensity_map, rast_out, output, poses_num, w_ambient, w_diffuse, rgb_H, rgb_W
);
}

} // namespace foundationpose_render
13 changes: 12 additions & 1 deletion detection_6d_foundationpose/src/foundationpose_render.cu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ void rasterize(

void interpolate(
cudaStream_t stream, float* attr_ptr, float* rast_ptr, int32_t* tri_ptr, float* out, int num_vertices,
int num_triangles, int attr_dim, int H, int W, int C);
int num_triangles, int attr_shape_dim, int attr_dim, int H, int W, int C);

void texture(
cudaStream_t stream, float* tex_ptr, float* uv_ptr, float* out, int tex_height, int tex_width, int tex_channel,
Expand All @@ -71,6 +71,17 @@ void transform_points(cudaStream_t stream, const float* transform_matrixs, int t
void generate_pose_clip(cudaStream_t stream, const float* transform_matrixs, const float* bbox2d_matrix, int transform_num, const float* points_vectors,
int points_num, float* transformed_points_vectors, int rgb_H, int rgb_W);

/**
* @param transform_matrixs 应当是`Col-Major`的transform_num个4x4矩阵
* @param normals_vectors 应当是`normals_num`个3x1向量
* @param transformed_normal_vectors 这里直接输出归一化后的z方向分量,供 `transform_num x normals_num`个,即 [hyp-pose, H, W, 1]
*/
void transform_normals(cudaStream_t stream, const float* transform_matrixs, int transform_num, const float* normals_vectors,
int normals_num, float* transformed_normal_vectors);

void refine_color(cudaStream_t stream, const float* color, const float* diffuse_intensity_map, const float* rast, float* output,
int poses_num, float w_ambient, float w_diffuse, int rgb_H, int rgb_W);

} // namespace foundationpose_render

#endif // NVIDIA_ISAAC_ROS_EXTENSIONS_FOUNDATIONPOSE_RENDER_CUDA_HPP_
7 changes: 7 additions & 0 deletions detection_6d_foundationpose/src/foundationpose_render.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ class FoundationPoseRenderer {
const std::vector<Eigen::MatrixXf>& tfs,
float* output_buffer) ;

bool TransformVertexNormalsOnCUDA(cudaStream_t stream,
const std::vector<Eigen::MatrixXf>& tfs,
float* output_buffer);

bool GeneratePoseClipOnCUDA(cudaStream_t stream,
float* output_buffer,
const std::vector<Eigen::MatrixXf>& poses,
Expand Down Expand Up @@ -121,13 +125,16 @@ class FoundationPoseRenderer {
using DeviceBufferUniquePtrType = std::unique_ptr<T, std::function<void(T*)>>;

DeviceBufferUniquePtrType<float> vertices_device_ {nullptr};
DeviceBufferUniquePtrType<float> vertex_normals_device_ {nullptr};
DeviceBufferUniquePtrType<float> texcoords_device_ {nullptr};
DeviceBufferUniquePtrType<int32_t> mesh_faces_device_ {nullptr};
DeviceBufferUniquePtrType<uint8_t> texture_map_device_ {nullptr};
// nvdiffrast render时相关缓存
DeviceBufferUniquePtrType<float> pose_clip_device_ {nullptr};
DeviceBufferUniquePtrType<float> rast_out_device_ {nullptr};
DeviceBufferUniquePtrType<float> pts_cam_device_ {nullptr};
DeviceBufferUniquePtrType<float> diffuse_intensity_device_ {nullptr};
DeviceBufferUniquePtrType<float> diffuse_intensity_map_device_ {nullptr};
DeviceBufferUniquePtrType<float> texcoords_out_device_ {nullptr};
DeviceBufferUniquePtrType<float> color_device_ {nullptr};
DeviceBufferUniquePtrType<float> xyz_map_device_ {nullptr};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ std::vector<Eigen::Matrix4f> MakeRotationGrid(unsigned int n_views = 40, int inp
auto R_inplane = Eigen::Affine3f::Identity();
R_inplane.rotate(Eigen::AngleAxisf(0, Eigen::Vector3f::UnitX()))
.rotate(Eigen::AngleAxisf(0, Eigen::Vector3f::UnitY()))
.rotate(Eigen::AngleAxisf(inplane_rot, Eigen::Vector3f::UnitZ()));
.rotate(Eigen::AngleAxisf(inplane_rot * M_PI / 180.0f, Eigen::Vector3f::UnitZ()));

cam_in_ob = cam_in_ob * R_inplane.matrix();
Eigen::Matrix4f ob_in_cam = cam_in_ob.inverse();
Expand Down
18 changes: 15 additions & 3 deletions detection_6d_foundationpose/src/foundationpose_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ TexturedMeshLoader::TexturedMeshLoader(const std::string& mesh_file_path,
// Walk through each of the mesh's vertices
for (unsigned int v = 0; v < mesh->mNumVertices; v++) {
vertices_.push_back(mesh->mVertices[v]);
vertex_normals_.push_back(mesh->mNormals[v]);
}
for (unsigned int i = 0 ; i < AI_MAX_NUMBER_OF_TEXTURECOORDS ; ++ i) {
if (mesh->mTextureCoords[i] != nullptr) {
Expand All @@ -167,12 +168,12 @@ TexturedMeshLoader::TexturedMeshLoader(const std::string& mesh_file_path,
LOG(INFO) << "Loading textured map file: " << textured_file_path;
texture_map_ = cv::imread(textured_file_path);
if (texture_map_.empty()) {
throw std::runtime_error("[TexturedMeshLoader] Failed to read textured image: "
+ textured_file_path);
// throw std::runtime_error("[TexturedMeshLoader] Failed to read textured image: "
// + textured_file_path);
texture_map_ = cv::Mat(2, 2, CV_8UC3, {100, 100, 100});
}
cv::cvtColor(texture_map_, texture_map_, cv::COLOR_BGR2RGB);


LOG(INFO) << "Successfully Loaded textured mesh file!!!";
LOG(INFO) << "Mesh has vertices_num: " << vertices_.size()
<< ", diameter: " << mesh_diamter_
Expand Down Expand Up @@ -217,6 +218,17 @@ TexturedMeshLoader::GetMeshVertices() const noexcept
return vertices_;
}

/**
* @brief 获取mesh模型顶点的法向量
*
* @return const std::vector<aiVector3D> &
*/
const std::vector<aiVector3D> &
TexturedMeshLoader::GetMeshVertexNormals() const noexcept
{
return vertex_normals_;
}

/**
* @brief 获取mesh模型的外观坐标系
*
Expand Down
Loading