From 0c7eba0b0ad46b8738f4380c39c2e8987ab38183 Mon Sep 17 00:00:00 2001 From: zz990099 <771647586@qq.com> Date: Mon, 24 Mar 2025 15:05:36 +0800 Subject: [PATCH 1/2] Fix[FoundationPoseSampling]: Fix rotation parameter unit mismatch in MakeRotationGrid Signed-off-by: zz990099 <771647586@qq.com> --- detection_6d_foundationpose/src/foundationpose_sampling.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detection_6d_foundationpose/src/foundationpose_sampling.cpp b/detection_6d_foundationpose/src/foundationpose_sampling.cpp index 92931a6..ac98010 100644 --- a/detection_6d_foundationpose/src/foundationpose_sampling.cpp +++ b/detection_6d_foundationpose/src/foundationpose_sampling.cpp @@ -200,7 +200,7 @@ std::vector MakeRotationGrid(unsigned int n_views = 40, int inp auto R_inplane = Eigen::Affine3f::Identity(); R_inplane.rotate(Eigen::AngleAxisf(0, Eigen::Vector3f::UnitX())) .rotate(Eigen::AngleAxisf(0, Eigen::Vector3f::UnitY())) - .rotate(Eigen::AngleAxisf(inplane_rot, Eigen::Vector3f::UnitZ())); + .rotate(Eigen::AngleAxisf(inplane_rot * M_PI / 180.0f, Eigen::Vector3f::UnitZ())); cam_in_ob = cam_in_ob * R_inplane.matrix(); Eigen::Matrix4f ob_in_cam = cam_in_ob.inverse(); From 6a27646dbc1716e64a7db0d6452d8c903ee30515 Mon Sep 17 00:00:00 2001 From: zz990099 <771647586@qq.com> Date: Mon, 24 Mar 2025 15:06:35 +0800 Subject: [PATCH 2/2] feat[FoundationPose]: Support texture-free render and inference --- .../src/foundationpose_render.cpp | 88 +++++++++++++++++-- .../src/foundationpose_render.cu | 88 ++++++++++++++++++- .../src/foundationpose_render.cu.hpp | 13 ++- .../src/foundationpose_render.hpp | 7 ++ .../src/foundationpose_utils.cpp | 18 +++- .../src/foundationpose_utils.hpp | 8 ++ 6 files changed, 211 insertions(+), 11 deletions(-) diff --git a/detection_6d_foundationpose/src/foundationpose_render.cpp b/detection_6d_foundationpose/src/foundationpose_render.cpp index fe56a31..643334c 100644 --- a/detection_6d_foundationpose/src/foundationpose_render.cpp +++ b/detection_6d_foundationpose/src/foundationpose_render.cpp @@ -270,6 +270,8 @@ FoundationPoseRenderer::PrepareBuffer() // nvdiffrast render 用到的缓存以及渲染器 size_t pose_clip_size = num_vertices_ * (kVertexPoints + 1) * input_poses_num_ * sizeof(float); size_t pts_cam_size = num_vertices_ * kVertexPoints * input_poses_num_ * sizeof(float); + size_t diffuse_intensity_size = num_vertices_ * input_poses_num_ * sizeof(float); + size_t diffuse_intensity_map_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * sizeof(float); size_t rast_out_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * (kVertexPoints + 1) * sizeof(float); size_t color_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof(float); size_t xyz_map_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof(float); @@ -279,6 +281,8 @@ FoundationPoseRenderer::PrepareBuffer() float* _pose_clip_device; float* _rast_out_device; float* _pts_cam_device; + float* _diffuse_intensity_device; + float* _diffuse_intensity_map_device; float* _texcoords_out_device; float* _color_device; float* _xyz_map_device; @@ -307,6 +311,14 @@ FoundationPoseRenderer::PrepareBuffer() "[FoundationPoseRenderer] cudaMalloc `_pts_cam_device` FAILED!!!"); pts_cam_device_ = DeviceBufferUniquePtrType(_pts_cam_device, CudaMemoryDeleter()); + CHECK_CUDA(cudaMalloc(&_diffuse_intensity_device, diffuse_intensity_size), + "[FoundationPoseRenderer] cudaMalloc `_diffuse_intensity_device` FAILED!!!"); + diffuse_intensity_device_ = DeviceBufferUniquePtrType(_diffuse_intensity_device, CudaMemoryDeleter()); + + CHECK_CUDA(cudaMalloc(&_diffuse_intensity_map_device, diffuse_intensity_map_size), + "[FoundationPoseRenderer] cudaMalloc `_diffuse_intensity_map_device` FAILED!!!"); + diffuse_intensity_map_device_ = DeviceBufferUniquePtrType(_diffuse_intensity_map_device, CudaMemoryDeleter()); + CHECK_CUDA(cudaMalloc(&_texcoords_out_device, texcoords_out_size), "[FoundationPoseRenderer] cudaMalloc `_texcoords_out_device` FAILED!!!"); texcoords_out_device_ = DeviceBufferUniquePtrType(_texcoords_out_device, CudaMemoryDeleter()); @@ -361,11 +373,13 @@ FoundationPoseRenderer::LoadTexturedMesh() { const auto& mesh_model_center = mesh_loader_->GetMeshModelCenter(); const auto& mesh_vertices = mesh_loader_->GetMeshVertices(); + const auto& mesh_vertex_normals = mesh_loader_->GetMeshVertexNormals(); const auto& mesh_texcoords = mesh_loader_->GetMeshTextureCoords(); const auto& mesh_faces = mesh_loader_->GetMeshFaces(); const auto& rgb_texture_map = mesh_loader_->GetTextureMap(); mesh_diameter_ = mesh_loader_->GetMeshDiameter(); + std::vector vertex_normals; // Walk through each of the mesh's vertices for (unsigned int v = 0; v < mesh_vertices.size(); v++) { @@ -373,6 +387,10 @@ FoundationPoseRenderer::LoadTexturedMesh() vertices_.push_back(mesh_vertices[v].y - mesh_model_center[1]); vertices_.push_back(mesh_vertices[v].z - mesh_model_center[2]); + vertex_normals.push_back(mesh_vertex_normals[v].x); + vertex_normals.push_back(mesh_vertex_normals[v].y); + vertex_normals.push_back(mesh_vertex_normals[v].z); + // Check if the mesh has texture coordinates if (mesh_texcoords.size() >= 1) { texcoords_.push_back(mesh_texcoords[0][v].x); @@ -422,6 +440,7 @@ FoundationPoseRenderer::LoadTexturedMesh() size_t texcoords_size = texcoords_.size() * sizeof(float); float* _vertices_device; + float* _vertex_normals_device; float* _texcoords_device; int32_t* _mesh_faces_device; uint8_t* _texture_map_device; @@ -430,6 +449,10 @@ FoundationPoseRenderer::LoadTexturedMesh() "[FoundationposeRender] cudaMalloc `mesh_faces_device` FAILED!!!"); vertices_device_ = DeviceBufferUniquePtrType(_vertices_device, CudaMemoryDeleter()); + CHECK_CUDA(cudaMalloc(&_vertex_normals_device, vertices_size), + "[FoundationposeRender] cudaMalloc `vertex_normals_device` FAILED!!!"); + vertex_normals_device_ = DeviceBufferUniquePtrType(_vertex_normals_device, CudaMemoryDeleter()); + CHECK_CUDA(cudaMalloc(&_mesh_faces_device, faces_size), "[FoundationposeRender] cudaMalloc `mesh_faces_device` FAILED!!!"); mesh_faces_device_ = DeviceBufferUniquePtrType(_mesh_faces_device, CudaMemoryDeleter()); @@ -442,9 +465,14 @@ FoundationPoseRenderer::LoadTexturedMesh() "[FoundationposeRender] cudaMalloc `texture_map_device_` FAILED!!!"); texture_map_device_ = DeviceBufferUniquePtrType(_texture_map_device, CudaMemoryDeleter()); - CHECK_CUDA(cudaMemcpy(vertices_device_.get(), - vertices_.data(), - vertices_size, + CHECK_CUDA(cudaMemcpy(vertices_device_.get(), + vertices_.data(), + vertices_size, + cudaMemcpyHostToDevice), + "[FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!"); + CHECK_CUDA(cudaMemcpy(vertex_normals_device_.get(), + vertex_normals.data(), + vertices_size, cudaMemcpyHostToDevice), "[FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!"); CHECK_CUDA(cudaMemcpy(mesh_faces_device_.get(), @@ -514,6 +542,36 @@ bool FoundationPoseRenderer::TransformVerticesOnCUDA(cudaStream_t stream, return true; } +bool FoundationPoseRenderer::TransformVertexNormalsOnCUDA(cudaStream_t stream, + const std::vector& tfs, + float* output_buffer) +{ + // Get the dimensions of the inputs + int tfs_size = tfs.size(); + CHECK_STATE(tfs_size != 0, + "[FoundationposeRender] The transfomation matrix is empty! "); + + CHECK_STATE(tfs[0].cols() == tfs[0].rows(), + "[FoundationposeRender] The transfomation matrix has different rows and cols! "); + + const int total_elements = tfs[0].cols() * tfs[0].rows(); + + float* transform_device_buffer_ = nullptr; + cudaMallocAsync(&transform_device_buffer_, tfs_size * total_elements * sizeof(float), stream); + + for (int i = 0 ; i < tfs_size ; ++ i) { + cudaMemcpyAsync(transform_device_buffer_ + i * total_elements, + tfs[i].data(), + total_elements * sizeof(float), + cudaMemcpyHostToDevice, + stream); + } + + foundationpose_render::transform_normals(stream, transform_device_buffer_, tfs_size, vertex_normals_device_.get(), num_vertices_, output_buffer); + + cudaFreeAsync(transform_device_buffer_, stream); + return true; +} bool FoundationPoseRenderer::GeneratePoseClipOnCUDA(cudaStream_t stream, float* output_buffer, @@ -595,7 +653,7 @@ FoundationPoseRenderer::NvdiffrastRender(cudaStream_t cuda_stream, foundationpose_render::interpolate( cuda_stream, pts_cam_device_.get(), rast_out_device_.get(), mesh_faces_device_.get(), xyz_map_device_.get(), - num_vertices_, num_faces_, kVertexPoints, + num_vertices_, num_faces_, 3, kVertexPoints, H, W, N); CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] interpolate failed!!!"); @@ -603,7 +661,7 @@ FoundationPoseRenderer::NvdiffrastRender(cudaStream_t cuda_stream, foundationpose_render::interpolate( cuda_stream, texcoords_device_.get(), rast_out_device_.get(), mesh_faces_device_.get(), texcoords_out_device_.get(), - num_vertices_, num_faces_, kTexcoordsDim, + num_vertices_, num_faces_, 2, kTexcoordsDim, H, W, N); CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] interpolate failed!!!"); @@ -619,6 +677,26 @@ FoundationPoseRenderer::NvdiffrastRender(cudaStream_t cuda_stream, CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] texture failed!!!"); + CHECK_STATE(TransformVertexNormalsOnCUDA(cuda_stream, poses, diffuse_intensity_device_.get()), + "[FoundationPoseRenderer] Transform vertex normals failed!!!"); + + foundationpose_render::interpolate(cuda_stream, + diffuse_intensity_device_.get(), + rast_out_device_.get(), + mesh_faces_device_.get(), + diffuse_intensity_map_device_.get(), + num_vertices_, num_faces_, 3, 1, H, W, N); + CHECK_CUDA(cudaGetLastError(), + "[FoundationPoseRenderer] interpolate failed!!!"); + + foundationpose_render::refine_color(cuda_stream, color_device_.get(), + diffuse_intensity_map_device_.get(), + rast_out_device_.get(), + color_device_.get(), + poses.size(), 0.8, 0.5, H, W); + CHECK_CUDA(cudaGetLastError(), + "[FoundationPoseRenderer] refine_color failed!!!"); + float min_value = 0.0; float max_value = 1.0; foundationpose_render::clamp(cuda_stream, color_device_.get(), min_value, max_value, N * H * W * kNumChannels); diff --git a/detection_6d_foundationpose/src/foundationpose_render.cu b/detection_6d_foundationpose/src/foundationpose_render.cu index c11fd2a..28f73e8 100644 --- a/detection_6d_foundationpose/src/foundationpose_render.cu +++ b/detection_6d_foundationpose/src/foundationpose_render.cu @@ -196,8 +196,8 @@ void rasterize( void interpolate( cudaStream_t stream, float* attr_ptr, float* rast_ptr, int32_t* tri_ptr, float* out, int num_vertices, - int num_triangles, int attr_dim, int H, int W, int C) { - int instance_mode = attr_dim > 2 ? 1 : 0; + int num_triangles, int attr_shape_dim, int attr_dim, int H, int W, int C) { + int instance_mode = attr_shape_dim > 2 ? 1 : 0; InterpolateKernelParams p = {}; // Initialize all fields to zero. p.instance_mode = instance_mode; @@ -339,4 +339,88 @@ void generate_pose_clip(cudaStream_t stream, const float* transform_matrixs, con transform_matrixs, bbox2d_matrix, M, points_vectors, N, transformed_points_vectors, rgb_H, rgb_W); } + +__global__ void transform_normals_kernel( + const float* transform_matrixs, int M, const float* normals_vectors, + int N, float* transformed_normal_vectors) +{ + int row_idx = threadIdx.y + blockIdx.y * blockDim.y; + int col_idx = threadIdx.x + blockIdx.x * blockDim.x; + if (row_idx >= M || col_idx >= N) return; + + const float* matrix = transform_matrixs + row_idx * 16; // 指向当前 4x4 变换矩阵 + const float* normal = normals_vectors + col_idx * 3; // 指向当前 normal 向量 + float* transformed_normal = transformed_normal_vectors + (row_idx * N + col_idx); + + float x = normal[0], y = normal[1], z = normal[2]; + // **Column-Major 访问方式** + float tx = matrix[0] * x + matrix[4] * y + matrix[8] * z; + float ty = matrix[1] * x + matrix[5] * y + matrix[9] * z; + float tz = matrix[2] * x + matrix[6] * y + matrix[10] * z; + // 只保留z方向的分量,取反 + float l2 = sqrt(tx*tx + ty*ty + tz*tz); + float value = l2 == 0 ? 0 : - tz / l2; + value = clamp_func(value, 0, 1); + transformed_normal[0] = value; +} + +void transform_normals(cudaStream_t stream, const float* transform_matrixs, int M, const float* normals_vectors, + int N, float* transformed_normal_vectors) +{ + dim3 blockSize = {32, 32}; + dim3 gridSize = {ceil_div(N, 32), ceil_div(M, 32)}; + + transform_normals_kernel<<>>( + transform_matrixs, M, normals_vectors, N, transformed_normal_vectors); +} + + +__global__ void renfine_color_kernel( + const float* color, const float* diffuse_intensity_map, const float* rast_out, float* output, int poses_num, float w_ambient, + float w_diffuse, int rgb_H, int rgb_W) +{ + int row_idx = threadIdx.y + blockIdx.y * blockDim.y; + int col_idx = threadIdx.x + blockIdx.x * blockDim.x; + if (row_idx >= rgb_H || col_idx >= rgb_W * poses_num) return; + + const int color_idx = col_idx / rgb_W; + const int color_row_idx = row_idx; + const int color_col_idx = col_idx - color_idx * rgb_W; + + const size_t pixel_idx = color_row_idx * rgb_W + color_col_idx; + const size_t pixel_offset = color_idx * rgb_H * rgb_W + pixel_idx; + + const float* rgb = color + pixel_offset * 3; + const float* diffuse = diffuse_intensity_map + pixel_offset; + const float* rast = rast_out + pixel_offset * 4; + float* out = output + pixel_offset * 3; + + float diff = diffuse[0]; + + float is_foreground = clamp_func(rast[3], 0, 1); + + float r = rgb[0] * (w_ambient + diff*w_diffuse) * is_foreground; + float g = rgb[1] * (w_ambient + diff*w_diffuse) * is_foreground; + float b = rgb[2] * (w_ambient + diff*w_diffuse) * is_foreground; + + r = clamp_func(r, 0, 1); + g = clamp_func(g, 0, 1); + b = clamp_func(b, 0, 1); + + out[0] = r; + out[1] = g; + out[2] = b; +} + +void refine_color(cudaStream_t stream, const float* color, const float* diffuse_intensity_map, const float* rast_out, float* output, + int poses_num, float w_ambient, float w_diffuse, int rgb_H, int rgb_W) +{ + dim3 blockSize = {32, 32}; + dim3 gridSize = {ceil_div(rgb_W * poses_num, 32), ceil_div(rgb_H, 32)}; + + renfine_color_kernel<<>>( + color, diffuse_intensity_map, rast_out, output, poses_num, w_ambient, w_diffuse, rgb_H, rgb_W + ); +} + } // namespace foundationpose_render \ No newline at end of file diff --git a/detection_6d_foundationpose/src/foundationpose_render.cu.hpp b/detection_6d_foundationpose/src/foundationpose_render.cu.hpp index b1ae91a..e67fa69 100644 --- a/detection_6d_foundationpose/src/foundationpose_render.cu.hpp +++ b/detection_6d_foundationpose/src/foundationpose_render.cu.hpp @@ -48,7 +48,7 @@ void rasterize( void interpolate( cudaStream_t stream, float* attr_ptr, float* rast_ptr, int32_t* tri_ptr, float* out, int num_vertices, - int num_triangles, int attr_dim, int H, int W, int C); + int num_triangles, int attr_shape_dim, int attr_dim, int H, int W, int C); void texture( cudaStream_t stream, float* tex_ptr, float* uv_ptr, float* out, int tex_height, int tex_width, int tex_channel, @@ -71,6 +71,17 @@ void transform_points(cudaStream_t stream, const float* transform_matrixs, int t void generate_pose_clip(cudaStream_t stream, const float* transform_matrixs, const float* bbox2d_matrix, int transform_num, const float* points_vectors, int points_num, float* transformed_points_vectors, int rgb_H, int rgb_W); +/** + * @param transform_matrixs 应当是`Col-Major`的transform_num个4x4矩阵 + * @param normals_vectors 应当是`normals_num`个3x1向量 + * @param transformed_normal_vectors 这里直接输出归一化后的z方向分量,供 `transform_num x normals_num`个,即 [hyp-pose, H, W, 1] + */ +void transform_normals(cudaStream_t stream, const float* transform_matrixs, int transform_num, const float* normals_vectors, + int normals_num, float* transformed_normal_vectors); + +void refine_color(cudaStream_t stream, const float* color, const float* diffuse_intensity_map, const float* rast, float* output, + int poses_num, float w_ambient, float w_diffuse, int rgb_H, int rgb_W); + } // namespace foundationpose_render #endif // NVIDIA_ISAAC_ROS_EXTENSIONS_FOUNDATIONPOSE_RENDER_CUDA_HPP_ \ No newline at end of file diff --git a/detection_6d_foundationpose/src/foundationpose_render.hpp b/detection_6d_foundationpose/src/foundationpose_render.hpp index d1c6f97..fb55266 100644 --- a/detection_6d_foundationpose/src/foundationpose_render.hpp +++ b/detection_6d_foundationpose/src/foundationpose_render.hpp @@ -64,6 +64,10 @@ class FoundationPoseRenderer { const std::vector& tfs, float* output_buffer) ; + bool TransformVertexNormalsOnCUDA(cudaStream_t stream, + const std::vector& tfs, + float* output_buffer); + bool GeneratePoseClipOnCUDA(cudaStream_t stream, float* output_buffer, const std::vector& poses, @@ -121,6 +125,7 @@ class FoundationPoseRenderer { using DeviceBufferUniquePtrType = std::unique_ptr>; DeviceBufferUniquePtrType vertices_device_ {nullptr}; + DeviceBufferUniquePtrType vertex_normals_device_ {nullptr}; DeviceBufferUniquePtrType texcoords_device_ {nullptr}; DeviceBufferUniquePtrType mesh_faces_device_ {nullptr}; DeviceBufferUniquePtrType texture_map_device_ {nullptr}; @@ -128,6 +133,8 @@ class FoundationPoseRenderer { DeviceBufferUniquePtrType pose_clip_device_ {nullptr}; DeviceBufferUniquePtrType rast_out_device_ {nullptr}; DeviceBufferUniquePtrType pts_cam_device_ {nullptr}; + DeviceBufferUniquePtrType diffuse_intensity_device_ {nullptr}; + DeviceBufferUniquePtrType diffuse_intensity_map_device_ {nullptr}; DeviceBufferUniquePtrType texcoords_out_device_ {nullptr}; DeviceBufferUniquePtrType color_device_ {nullptr}; DeviceBufferUniquePtrType xyz_map_device_ {nullptr}; diff --git a/detection_6d_foundationpose/src/foundationpose_utils.cpp b/detection_6d_foundationpose/src/foundationpose_utils.cpp index 8333d46..43323ba 100644 --- a/detection_6d_foundationpose/src/foundationpose_utils.cpp +++ b/detection_6d_foundationpose/src/foundationpose_utils.cpp @@ -148,6 +148,7 @@ TexturedMeshLoader::TexturedMeshLoader(const std::string& mesh_file_path, // Walk through each of the mesh's vertices for (unsigned int v = 0; v < mesh->mNumVertices; v++) { vertices_.push_back(mesh->mVertices[v]); + vertex_normals_.push_back(mesh->mNormals[v]); } for (unsigned int i = 0 ; i < AI_MAX_NUMBER_OF_TEXTURECOORDS ; ++ i) { if (mesh->mTextureCoords[i] != nullptr) { @@ -167,12 +168,12 @@ TexturedMeshLoader::TexturedMeshLoader(const std::string& mesh_file_path, LOG(INFO) << "Loading textured map file: " << textured_file_path; texture_map_ = cv::imread(textured_file_path); if (texture_map_.empty()) { - throw std::runtime_error("[TexturedMeshLoader] Failed to read textured image: " - + textured_file_path); + // throw std::runtime_error("[TexturedMeshLoader] Failed to read textured image: " + // + textured_file_path); + texture_map_ = cv::Mat(2, 2, CV_8UC3, {100, 100, 100}); } cv::cvtColor(texture_map_, texture_map_, cv::COLOR_BGR2RGB); - LOG(INFO) << "Successfully Loaded textured mesh file!!!"; LOG(INFO) << "Mesh has vertices_num: " << vertices_.size() << ", diameter: " << mesh_diamter_ @@ -217,6 +218,17 @@ TexturedMeshLoader::GetMeshVertices() const noexcept return vertices_; } +/** + * @brief 获取mesh模型顶点的法向量 + * + * @return const std::vector & + */ +const std::vector & +TexturedMeshLoader::GetMeshVertexNormals() const noexcept +{ + return vertex_normals_; +} + /** * @brief 获取mesh模型的外观坐标系 * diff --git a/detection_6d_foundationpose/src/foundationpose_utils.hpp b/detection_6d_foundationpose/src/foundationpose_utils.hpp index dd6de6a..b8943db 100644 --- a/detection_6d_foundationpose/src/foundationpose_utils.hpp +++ b/detection_6d_foundationpose/src/foundationpose_utils.hpp @@ -68,6 +68,13 @@ class TexturedMeshLoader { */ const std::vector & GetMeshVertices() const noexcept; + /** + * @brief 获取mesh模型顶点的法向量 + * + * @return const std::vector & + */ + const std::vector & GetMeshVertexNormals() const noexcept; + /** * @brief 获取mesh模型的外观坐标系 * @@ -114,6 +121,7 @@ class TexturedMeshLoader { float mesh_diamter_; Eigen::Vector3f mesh_center_; std::vector vertices_; + std::vector vertex_normals_; std::vector> texcoords_; std::vector faces_; Eigen::Matrix4f obb_;