From 239d42db574a457fb883d1de4bf16d9de74117dd Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Wed, 30 Nov 2022 11:14:40 +0000 Subject: [PATCH 01/14] Update Inpaint pipeline --- .../cpp/pipeline_stable_diffusion_inpaint.cc | 67 +++++++++++++++++++ .../cpp/pipeline_stable_diffusion_inpaint.h | 57 ++++++++++++++++ .../stable_diffusion/cpp/scheduler.h | 1 + 3 files changed, 125 insertions(+) create mode 100644 examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc create mode 100644 examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc new file mode 100644 index 00000000000..5d4c7b81cda --- /dev/null +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pipeline_stable_diffusion_inpaint.h" + +using namespace paddlenlp; + +namespace fastdeploy { + +StableDiffusionInpaintPipeline::StableDiffusionInpaintPipeline( + std::unique_ptr vae_encoder, std::unique_ptr vae_decoder, + std::unique_ptr text_encoder, std::unique_ptr unet, + std::unique_ptr scheduler, + const paddlenlp::fast_tokenizer::tokenizers_impl::ClipFastTokenizer& + tokenizer) + : vae_encoder_(std::move(vae_encoder)), + vae_decoder_(std::move(vae_decoder)), + text_encoder_(std::move(text_encoder)), unet_(std::move(unet)), + scheduler_(std::move(scheduler)), tokenizer_(tokenizer) {} + +void StableDiffusionInpaintPipeline::Predict( + const std::vector& prompts, cv::Mat* image, + cv::Mat* mask_image, FDTensor* output_image, int height, int width, + int num_inference_steps, float guidance_scale, + const std::vector& negative_prompt, int num_images_per_prompt, + float eta, uint32_t max_length, const FDTensor* latents, + callback_ptr callback, int callback_steps) { + int batch_size = prompts.size(); + FDASSERT(batch_size >= 1, "prompts should not be empty"); + FDASSERT( + height % 8 != 0 or width % 8 != 0, + "`height` and `width` have to be divisible by 8 but are {%d} and {%d}.", + height, width); + FDASSERT(callback_steps <= 0, + "`callback_steps` has to be a positive integer but is {%d}", + callback_steps); + + scheduler_->SetTimesteps(num_inference_steps); + + // Setting tokenizer attr + if (max_length == 0) { + tokenizer_.EnablePadMethod(fast_tokenizer::core::RIGHT, + tokenizer_.GetPadTokenId(), 0, + tokenizer_.GetPadToken(), nullptr, nullptr); + tokenizer_.DisableTruncMethod(); + } else { + tokenizer_.EnablePadMethod(fast_tokenizer::core::RIGHT, + tokenizer_.GetPadTokenId(), 0, + tokenizer_.GetPadToken(), &max_length, nullptr); + tokenizer_.EnableTruncMethod(max_length, 0, fast_tokenizer::core::RIGHT, + fast_tokenizer::core::LONGEST_FIRST); + } + std::vector encodings; + tokenizer_.EncodeBatchStrings(prompts, &encodings); +} +} // namespace fastdeploy diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h new file mode 100644 index 00000000000..04ad288adff --- /dev/null +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h @@ -0,0 +1,57 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "./scheduler.h" +#include "fast_tokenizer/tokenizers/clip_fast_tokenizer.h" +#include "fastdeploy/core/fd_tensor.h" +#include "fastdeploy/runtime.h" +#include "opencv2/core/core.hpp" +#include +#include +#include + +namespace fastdeploy { + +class StableDiffusionInpaintPipeline { + public: + typedef void (*callback_ptr)(int, int, FDTensor*); + + StableDiffusionInpaintPipeline( + std::unique_ptr vae_encoder, + std::unique_ptr vae_decoder, + std::unique_ptr text_encoder, std::unique_ptr unet, + std::unique_ptr scheduler, + const paddlenlp::fast_tokenizer::tokenizers_impl::ClipFastTokenizer& + tokenizer); + void Predict(const std::vector& prompts, cv::Mat* image, + cv::Mat* mask_image, FDTensor* output_image, int height = 512, + int width = 512, int num_inference_steps = 50, + float guidance_scale = 7.5, + const std::vector& negative_prompt = {}, + int num_images_per_prompt = 1, float eta = 0.0, + uint32_t max_length = 77, const FDTensor* latents = nullptr, + callback_ptr callback = nullptr, int callback_steps = 1); + + private: + std::unique_ptr vae_encoder_; + std::unique_ptr vae_decoder_; + std::unique_ptr text_encoder_; + std::unique_ptr unet_; + std::unique_ptr scheduler_; + paddlenlp::fast_tokenizer::tokenizers_impl::ClipFastTokenizer tokenizer_; +}; + +} // namespace fastdeploy diff --git a/examples/multimodal/stable_diffusion/cpp/scheduler.h b/examples/multimodal/stable_diffusion/cpp/scheduler.h index 6a5cd2fed9c..6933fe3c87a 100644 --- a/examples/multimodal/stable_diffusion/cpp/scheduler.h +++ b/examples/multimodal/stable_diffusion/cpp/scheduler.h @@ -19,6 +19,7 @@ namespace fastdeploy { class Scheduler { + public: virtual void SetTimesteps(int num_inference_steps) = 0; virtual void Step(const FDTensor& model_output, int timestep, const FDTensor& sample, FDTensor* prev_sample) = 0; From 9dbbaa3bedd16888014e15e8fb961831b943276c Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Wed, 30 Nov 2022 17:45:51 +0000 Subject: [PATCH 02/14] Update concat --- .../cpp/pipeline_stable_diffusion_inpaint.cc | 55 +++++++++++++++++++ fastdeploy/function/concat.cc | 8 ++- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index 5d4c7b81cda..077d96c7edf 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "pipeline_stable_diffusion_inpaint.h" +#include "fastdeploy/function/functions.h" using namespace paddlenlp; @@ -63,5 +64,59 @@ void StableDiffusionInpaintPipeline::Predict( } std::vector encodings; tokenizer_.EncodeBatchStrings(prompts, &encodings); + + std::vector input_ids; + for (auto& encoding : encodings) { + auto curr_ids = encoding.GetIds(); + input_ids.insert(input_ids.end(), curr_ids.begin(), curr_ids.end()); + } + encodings.clear(); + // Get text encoder output + FDTensor text_intput_ids; + std::vector text_inputs(1); + text_inputs[0].SetExternalData({batch_size, max_length}, FDDataType::INT64, + input_ids.data()); + + TensorInfo text_info = text_encoder_->GetInputInfo(0); + text_inputs[0].name = text_info.name; + int output_size = text_encoder_->GetOutputInfos().size(); + std::vector text_outputs(output_size); + text_encoder_->Infer(text_inputs, &text_outputs); + + FDTensor text_embeddings; + function::Tile(text_outputs[0], {num_images_per_prompt, 1, 1}, + &text_embeddings); + + // here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + // of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + // corresponds to doing no classifier free guidance. + bool do_classifier_free_guidance = guidance_scale > 1.0; + if (do_classifier_free_guidance) { + std::vector uncond_tokens; + if (negative_prompt.size() == 0) { + uncond_tokens = {""}; + } else if (negative_prompt.size() != batch_size) { + FDASSERT(false, + "negative_prompt has batch size %d, but prompt has batch size " + "%d. Please make sure that passed `negative_prompt` matches the " + "batch size of `prompt`.", + prompts.size(), negative_prompt.size()); + } else { + uncond_tokens = negative_prompt; + } + tokenizer_.EncodeBatchStrings(uncond_tokens, &encodings); + input_ids.clear(); + for (auto& encoding : encodings) { + auto curr_ids = encoding.GetIds(); + input_ids.insert(input_ids.end(), curr_ids.begin(), curr_ids.end()); + } + text_inputs[0].SetExternalData({batch_size, max_length}, FDDataType::INT64, + input_ids.data()); + text_encoder_->Infer(text_inputs, &text_outputs); + FDTensor uncond_embeddings; + function::Tile(text_outputs[0], {num_images_per_prompt, 1, 1}, + &uncond_embeddings); + function::Concat({uncond_embeddings, text_embeddings}, &text_embeddings); + } } } // namespace fastdeploy diff --git a/fastdeploy/function/concat.cc b/fastdeploy/function/concat.cc index 295c3c25a43..4f07743942d 100644 --- a/fastdeploy/function/concat.cc +++ b/fastdeploy/function/concat.cc @@ -88,11 +88,13 @@ template void ConcatKernel(const std::vector& input, FDTensor* output, int axis) { auto output_shape = ComputeAndCheckConcatOutputShape(input, axis); - output->Resize(output_shape, TypeToDataType::dtype, output->name, - input[0].device); + FDTensor output_tmp; + output_tmp.Resize(output_shape, TypeToDataType::dtype, output->name, + input[0].device); ConcatFunctor functor; - functor(input, axis, output); + functor(input, axis, &output_tmp); + *output = std::move(output_tmp); } void Concat(const std::vector& x, FDTensor* out, int axis) { From 895e5d009449451fae88bb78cf4a410567b0e370 Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Thu, 1 Dec 2022 03:32:11 +0000 Subject: [PATCH 03/14] Add GaussianRandomKernel --- .../cpp/pipeline_stable_diffusion_inpaint.cc | 16 +++++++ fastdeploy/function/gaussian_random.cc | 46 +++++++++++++++++++ fastdeploy/function/gaussian_random.h | 36 +++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 fastdeploy/function/gaussian_random.cc create mode 100644 fastdeploy/function/gaussian_random.h diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index 077d96c7edf..04fd6f78aaa 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -14,11 +14,15 @@ #include "pipeline_stable_diffusion_inpaint.h" #include "fastdeploy/function/functions.h" +#include using namespace paddlenlp; namespace fastdeploy { +static constexpr int NUM_LATENT_CHANNELS = 4; +static constexpr int NUM_UNET_INPUT_CHANNELS = 9; + StableDiffusionInpaintPipeline::StableDiffusionInpaintPipeline( std::unique_ptr vae_encoder, std::unique_ptr vae_decoder, std::unique_ptr text_encoder, std::unique_ptr unet, @@ -118,5 +122,17 @@ void StableDiffusionInpaintPipeline::Predict( &uncond_embeddings); function::Concat({uncond_embeddings, text_embeddings}, &text_embeddings); } + std::vector latents_shape = {batch_size * num_images_per_prompt, + NUM_LATENT_CHANNELS, height / 8, + width / 8}; + auto latents_dtype = text_embeddings.Dtype(); + if (latents == nullptr) { + + } else if { + bool result = std::equals(latents_shape.begin(), latents_shape.end(), + latents->Shape().begin()); + FDASSERT(result, "Unexpected latents shape, got %s, expected %s", + Str(latents_shape).c_str(), Str(latents->Shape()).c_str()); + } } } // namespace fastdeploy diff --git a/fastdeploy/function/gaussian_random.cc b/fastdeploy/function/gaussian_random.cc new file mode 100644 index 00000000000..18657c4f2a3 --- /dev/null +++ b/fastdeploy/function/gaussian_random.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/function/gaussian_random.h" +#include +#include +#include + +namespace fastdeploy { +namespace function { + +template +void GaussianRandomKernel(const std::vector& shape, float mean, + float std, int seed, FDTensor* out) { + std::normal_distribution dist(mean, std); + + out->Allocate(shape, TypeToDataType::dtype); + int64_t size = out->Numel(); + T* data = reinterpret_cast(out->Data()); + std::mt19937_64 engine; + engine.seed(seed); + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(engine); + } +} + +void GaussianRandom(const std::vector& shape, FDTensor* out, + FDDataType dtype, float mean, float std, int seed) { + FD_VISIT_FLOAT_TYPES(dtype, "GaussianRandomKernel", [&]() { + GaussianRandomKernel(shape, mean, std, seed, out); + }); +} + +} // namespace function +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/function/gaussian_random.h b/fastdeploy/function/gaussian_random.h new file mode 100644 index 00000000000..85a4ff8a638 --- /dev/null +++ b/fastdeploy/function/gaussian_random.h @@ -0,0 +1,36 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/core/fd_tensor.h" + +namespace fastdeploy { +namespace function { + +/** Output is obtained by gathering entries of axis of x indexed by index and + * concatenate them together. + @param shape The output tensor shape. + @param out the output tensor. + @param mean mean value of gaussian random + @param std standard value of gaussian random + @param seed The seed of random generator. + @param dtype The data type of the output Tensor. +*/ +void GaussianRandom(const std::vector& shape, FDTensor* out, + FDDataType dtype = FDDataType::FP32, float mean = 0.0f, + float std = 1.0f, int seed = 0); + +} // namespace function +} // namespace fastdeploy From 97179617513806e11bfc9eef21f6b6579e0a370f Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Thu, 1 Dec 2022 03:44:23 +0000 Subject: [PATCH 04/14] Update GaussianRandom --- .../cpp/pipeline_stable_diffusion_inpaint.cc | 8 +++++--- fastdeploy/function/functions.h | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index 04fd6f78aaa..25f0ab5bd78 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -126,13 +126,15 @@ void StableDiffusionInpaintPipeline::Predict( NUM_LATENT_CHANNELS, height / 8, width / 8}; auto latents_dtype = text_embeddings.Dtype(); + FDTensor actual_latents; if (latents == nullptr) { - - } else if { - bool result = std::equals(latents_shape.begin(), latents_shape.end(), + function::GaussianRandom(latents_shape, &actual_latents, latents_dtype); + } else { + bool result = std::equal(latents_shape.begin(), latents_shape.end(), latents->Shape().begin()); FDASSERT(result, "Unexpected latents shape, got %s, expected %s", Str(latents_shape).c_str(), Str(latents->Shape()).c_str()); + actual_latents = *latents; } } } // namespace fastdeploy diff --git a/fastdeploy/function/functions.h b/fastdeploy/function/functions.h index d2ffe6a0c1d..a43407839fa 100644 --- a/fastdeploy/function/functions.h +++ b/fastdeploy/function/functions.h @@ -21,6 +21,7 @@ #include "fastdeploy/function/elementwise.h" #include "fastdeploy/function/full.h" #include "fastdeploy/function/gather_scatter_along_axis.h" +#include "fastdeploy/function/gaussian_random.h" #include "fastdeploy/function/isfinite.h" #include "fastdeploy/function/linspace.h" #include "fastdeploy/function/math.h" From 4fcaa549c8df8345c6151ed2ba60fdbf5f477ff5 Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Thu, 1 Dec 2022 06:49:25 +0000 Subject: [PATCH 05/14] Add vae endoder --- .../cpp/dpm_solver_multistep_scheduler.cc | 2 + .../cpp/dpm_solver_multistep_scheduler.h | 1 + .../cpp/pipeline_stable_diffusion_inpaint.cc | 80 +++++++++++++++---- .../cpp/pipeline_stable_diffusion_inpaint.h | 3 + .../stable_diffusion/cpp/scheduler.h | 1 + fastdeploy/function/tile.cc | 5 +- 6 files changed, 75 insertions(+), 17 deletions(-) diff --git a/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc b/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc index cb6cf970b7b..8b96d836664 100644 --- a/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc +++ b/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc @@ -96,6 +96,8 @@ DPMSolverMultistepScheduler::DPMSolverMultistepScheduler( lower_order_nums_ = 0; } +float DPMSolverMultistepScheduler::InitNoiseSigma() { return 1.0; } + void DPMSolverMultistepScheduler::ConvertModelOutput( const FDTensor& model_output, int timestep, const FDTensor& sample, FDTensor* out) { diff --git a/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.h b/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.h index c6f037feead..6924eb80278 100644 --- a/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.h +++ b/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.h @@ -54,6 +54,7 @@ class DPMSolverMultistepScheduler : public Scheduler { const std::vector& timesteps = {}) override; void AddNoise(const FDTensor& original_samples, const FDTensor& noise, const FDTensor& timesteps, FDTensor* out) override; + float InitNoiseSigma() override; struct Config { int num_train_timesteps_; float beta_start_; diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index 25f0ab5bd78..fc401d4a203 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -23,6 +23,10 @@ namespace fastdeploy { static constexpr int NUM_LATENT_CHANNELS = 4; static constexpr int NUM_UNET_INPUT_CHANNELS = 9; +void StableDiffusionInpaintPipeline::PrepareMaskAndMaskedImage( + cv::Mat* image, cv::Mat* mask_mat, const std::vector& shape, + FDTensor* mask, FDTensor* mask_image) {} + StableDiffusionInpaintPipeline::StableDiffusionInpaintPipeline( std::unique_ptr vae_encoder, std::unique_ptr vae_decoder, std::unique_ptr text_encoder, std::unique_ptr unet, @@ -77,23 +81,22 @@ void StableDiffusionInpaintPipeline::Predict( encodings.clear(); // Get text encoder output FDTensor text_intput_ids; - std::vector text_inputs(1); - text_inputs[0].SetExternalData({batch_size, max_length}, FDDataType::INT64, - input_ids.data()); + std::vector inputs(1); + inputs[0].SetExternalData({batch_size, max_length}, FDDataType::INT64, + input_ids.data()); TensorInfo text_info = text_encoder_->GetInputInfo(0); - text_inputs[0].name = text_info.name; + inputs[0].name = text_info.name; int output_size = text_encoder_->GetOutputInfos().size(); - std::vector text_outputs(output_size); - text_encoder_->Infer(text_inputs, &text_outputs); + std::vector outputs(output_size); + text_encoder_->Infer(inputs, &outputs); FDTensor text_embeddings; - function::Tile(text_outputs[0], {num_images_per_prompt, 1, 1}, - &text_embeddings); + function::Tile(outputs[0], {num_images_per_prompt, 1, 1}, &text_embeddings); - // here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - // of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - // corresponds to doing no classifier free guidance. + // here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + // of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + // corresponds to doing no classifier free guidance. bool do_classifier_free_guidance = guidance_scale > 1.0; if (do_classifier_free_guidance) { std::vector uncond_tokens; @@ -114,11 +117,11 @@ void StableDiffusionInpaintPipeline::Predict( auto curr_ids = encoding.GetIds(); input_ids.insert(input_ids.end(), curr_ids.begin(), curr_ids.end()); } - text_inputs[0].SetExternalData({batch_size, max_length}, FDDataType::INT64, - input_ids.data()); - text_encoder_->Infer(text_inputs, &text_outputs); + inputs[0].SetExternalData({batch_size, max_length}, FDDataType::INT64, + input_ids.data()); + text_encoder_->Infer(inputs, &outputs); FDTensor uncond_embeddings; - function::Tile(text_outputs[0], {num_images_per_prompt, 1, 1}, + function::Tile(outputs[0], {num_images_per_prompt, 1, 1}, &uncond_embeddings); function::Concat({uncond_embeddings, text_embeddings}, &text_embeddings); } @@ -131,10 +134,55 @@ void StableDiffusionInpaintPipeline::Predict( function::GaussianRandom(latents_shape, &actual_latents, latents_dtype); } else { bool result = std::equal(latents_shape.begin(), latents_shape.end(), - latents->Shape().begin()); + latents->Shape().begin()); FDASSERT(result, "Unexpected latents shape, got %s, expected %s", Str(latents_shape).c_str(), Str(latents->Shape()).c_str()); actual_latents = *latents; } + FDTensor mask_t, mask_image_t; + PrepareMaskAndMaskedImage(image, mask_image, {height / 8, width / 8}, &mask_t, + &mask_image_t); + function::Cast(mask_t, &mask_t, actual_latents.Dtype()); + function::Cast(mask_image_t, &mask_image_t, actual_latents.Dtype()); + + // Get vae encoder output + TensorInfo vae_encoder_info = vae_encoder_->GetInputInfo(0); + mask_image_t.name = vae_encoder_info.name; + outputs.resize(vae_encoder_->GetOutputInfos().size()); + inputs = {mask_image_t}; + vae_encoder_->Infer(inputs, &outputs); + FDTensor masked_image_latents = 0.18215 * outputs[0]; + + auto mask_shape = mask_t.Shape(); + mask_shape[0] = batch_size * num_images_per_prompt; + function::Tile(mask_t, mask_shape, &mask_t); + + auto mask_image_shape = mask_image_t.Shape(); + mask_image_shape[0] = batch_size * num_images_per_prompt; + function::Tile(mask_image_t, mask_image_shape, &mask_image_t); + + if (do_classifier_free_guidance) { + function::Concat({mask_t, mask_t}, &mask_t); + function::Concat({mask_image_t, mask_image_t}, &mask_image_t); + } + int num_channels_mask = mask_t.Shape()[1]; + int num_channels_masked_image = mask_image_t.Shape()[1]; + FDASSERT( + NUM_LATENT_CHANNELS + num_channels_mask + num_channels_masked_image == + NUM_UNET_INPUT_CHANNELS, + "Incorrect configuration settings! The config of `pipeline.unet` expects" + " {%d} but received `num_channels_latents`: %d + `num_channels_mask`: %d " + "+ `num_channels_masked_image`: %d" + " = %d. Please verify the config of `pipeline.unet` or your `mask_image` " + "or `image` input.", + NUM_UNET_INPUT_CHANNELS, NUM_LATENT_CHANNELS, num_channels_mask, + num_channels_masked_image, + NUM_LATENT_CHANNELS + num_channels_mask + num_channels_masked_image); + + // set timesteps + scheduler_->SetTimesteps(num_inference_steps); + + // scale the initial noise by the standard deviation required by the scheduler + actual_latents = actual_latents * scheduler_->InitNoiseSigma(); } } // namespace fastdeploy diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h index 04ad288adff..9f0fec35ec9 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h @@ -46,6 +46,9 @@ class StableDiffusionInpaintPipeline { callback_ptr callback = nullptr, int callback_steps = 1); private: + void PrepareMaskAndMaskedImage(cv::Mat* image, cv::Mat* mask_mat, + const std::vector& shape, + FDTensor* mask, FDTensor* mask_image); std::unique_ptr vae_encoder_; std::unique_ptr vae_decoder_; std::unique_ptr text_encoder_; diff --git a/examples/multimodal/stable_diffusion/cpp/scheduler.h b/examples/multimodal/stable_diffusion/cpp/scheduler.h index 6933fe3c87a..432bfeea5c1 100644 --- a/examples/multimodal/stable_diffusion/cpp/scheduler.h +++ b/examples/multimodal/stable_diffusion/cpp/scheduler.h @@ -27,6 +27,7 @@ class Scheduler { const std::vector& timesteps = {}) = 0; virtual void AddNoise(const FDTensor& original_samples, const FDTensor& noise, const FDTensor& timesteps, FDTensor* out) = 0; + virtual float InitNoiseSigma() = 0; }; } // namespace fastdeploy diff --git a/fastdeploy/function/tile.cc b/fastdeploy/function/tile.cc index 6437b4ec603..143d9d6dbef 100644 --- a/fastdeploy/function/tile.cc +++ b/fastdeploy/function/tile.cc @@ -49,6 +49,7 @@ void TileFunctor(const FDTensor& x, return; } + FDTensor out_tmp; Eigen::DSizes bcast_dims; for (size_t i = 0; i < repeat_times.size(); ++i) { bcast_dims[i] = repeat_times[i]; @@ -59,12 +60,14 @@ void TileFunctor(const FDTensor& x, out_shape[i] *= repeat_times[i]; } - out->Allocate(out_shape, x.Dtype()); + out_tmp.Allocate(out_shape, x.Dtype()); auto eigen_x = EigenTensor::From(x, x_shape); auto eigen_out = EigenTensor::From(*out, out_shape); const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); eigen_out.device(dev) = eigen_x.broadcast(bcast_dims); + + *out = std::move(out_tmp); } template From 400d6b5e89242c21cbdc0b8f237b87a956e64497 Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Thu, 1 Dec 2022 07:56:40 +0000 Subject: [PATCH 06/14] Add unet infer --- .../cpp/dpm_solver_multistep_scheduler.cc | 2 + .../cpp/dpm_solver_multistep_scheduler.h | 1 + .../cpp/pipeline_stable_diffusion_inpaint.cc | 48 ++++++++++++++++++- .../stable_diffusion/cpp/scheduler.h | 1 + 4 files changed, 50 insertions(+), 2 deletions(-) diff --git a/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc b/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc index 8b96d836664..b6ac2220014 100644 --- a/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc +++ b/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc @@ -394,4 +394,6 @@ void DPMSolverMultistepScheduler::AddNoise(const FDTensor& original_samples, *out = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise; } +FDTensor DPMSolverMultistepScheduler::GetTimesteps() { return timesteps_; } + } // namespace fastdeploy diff --git a/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.h b/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.h index 6924eb80278..0775ba1ee79 100644 --- a/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.h +++ b/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.h @@ -55,6 +55,7 @@ class DPMSolverMultistepScheduler : public Scheduler { void AddNoise(const FDTensor& original_samples, const FDTensor& noise, const FDTensor& timesteps, FDTensor* out) override; float InitNoiseSigma() override; + FDTensor GetTimesteps() override; struct Config { int num_train_timesteps_; float beta_start_; diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index fc401d4a203..7813f9dbbe7 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -55,8 +55,6 @@ void StableDiffusionInpaintPipeline::Predict( "`callback_steps` has to be a positive integer but is {%d}", callback_steps); - scheduler_->SetTimesteps(num_inference_steps); - // Setting tokenizer attr if (max_length == 0) { tokenizer_.EnablePadMethod(fast_tokenizer::core::RIGHT, @@ -184,5 +182,51 @@ void StableDiffusionInpaintPipeline::Predict( // scale the initial noise by the standard deviation required by the scheduler actual_latents = actual_latents * scheduler_->InitNoiseSigma(); + + auto timestep = scheduler_->GetTimesteps(); + int64_t* timestep_data = reinterpret_cast(timestep.Data()); + for (int i = 0; i < timestep.Numel(); ++i) { + FDTensor t; + function::Slice(timestep, {0}, {i}, &t); + // expand the latents if we are doing classifier free guidance + FDTensor latent_model_input; + if (do_classifier_free_guidance) { + function::Concat({actual_latents, actual_latents}, &latent_model_input); + } else { + latent_model_input = actual_latents; + } + // concat latents, mask, masked_image_latnets in the channel dimension + function::Concat({latent_model_input, mask_t, mask_image_t}, + &latent_model_input, 1); + scheduler_->ScaleModelInput(latent_model_input, &latent_model_input, {t}); + + // predict the noise residual + FDTensor noise_pred; + auto unet_infos = unet_->GetInputInfos(); + latent_model_input.name = unet_infos[0].name; + t.name = unet_infos[1].name; + text_embeddings.name = unet_infos[2].name; + outputs.resize(unet_->GetOutputInfos().size()); + inputs = {latent_model_input, t, text_embeddings}; + unet_->Infer(inputs, &outputs); + noise_pred = std::move(outputs[0]); + // perform guidance + if (do_classifier_free_guidance) { + std::vector noise_preds; + int dim0 = noise_pred.Shape()[0]; + function::Split(noise_pred, {dim0 - dim0 / 2, dim0 / 2}, &noise_preds); + noise_pred = + noise_preds[0] + guidance_scale * (noise_preds[1] - noise_preds[0]); + } + + // compute the previous noisy sample x_t -> x_t-1 + int64_t time = reinterpret_cast(t.Data())[0]; + scheduler_->Step(noise_pred, time, actual_latents, &actual_latents); + + // call the callback, if provided + if (callback != nullptr && i % callback_steps == 0) { + callback(i, time, &actual_latents); + } + } } } // namespace fastdeploy diff --git a/examples/multimodal/stable_diffusion/cpp/scheduler.h b/examples/multimodal/stable_diffusion/cpp/scheduler.h index 432bfeea5c1..e4dc452def8 100644 --- a/examples/multimodal/stable_diffusion/cpp/scheduler.h +++ b/examples/multimodal/stable_diffusion/cpp/scheduler.h @@ -21,6 +21,7 @@ namespace fastdeploy { class Scheduler { public: virtual void SetTimesteps(int num_inference_steps) = 0; + virtual FDTensor GetTimesteps() = 0; virtual void Step(const FDTensor& model_output, int timestep, const FDTensor& sample, FDTensor* prev_sample) = 0; virtual void ScaleModelInput(const FDTensor& sample, FDTensor* out, From 90e40c7ca6a197ee5394cec4406505f1f8b3db31 Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Thu, 1 Dec 2022 08:40:10 +0000 Subject: [PATCH 07/14] Add vae decoder predict --- .../cpp/pipeline_stable_diffusion_inpaint.cc | 37 +++++++++++++++++-- .../cpp/pipeline_stable_diffusion_inpaint.h | 7 ++-- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index 7813f9dbbe7..3bc871193e7 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -40,10 +40,10 @@ StableDiffusionInpaintPipeline::StableDiffusionInpaintPipeline( void StableDiffusionInpaintPipeline::Predict( const std::vector& prompts, cv::Mat* image, - cv::Mat* mask_image, FDTensor* output_image, int height, int width, - int num_inference_steps, float guidance_scale, + cv::Mat* mask_image, std::vector* output_images, int height, + int width, int num_inference_steps, float guidance_scale, const std::vector& negative_prompt, int num_images_per_prompt, - float eta, uint32_t max_length, const FDTensor* latents, + float eta, uint32_t max_length, const FDTensor* latents, bool output_cv_mat, callback_ptr callback, int callback_steps) { int batch_size = prompts.size(); FDASSERT(batch_size >= 1, "prompts should not be empty"); @@ -227,6 +227,37 @@ void StableDiffusionInpaintPipeline::Predict( if (callback != nullptr && i % callback_steps == 0) { callback(i, time, &actual_latents); } + actual_latents = (1.0f / 0.18215f) * actual_latents; + + // Get vae decoder output + int actual_latents_bs = actual_latents.Shape()[0]; + TensorInfo vae_decoder_info = vae_decoder_->GetInputInfo(0); + inputs.resize(1); + outputs.resize(vae_decoder_->GetOutputInfos().size()); + std::vector decoder_reuslt; + for (int i = 0; i < actual_latents_bs; ++i) { + function::Slice(actual_latents, {0}, {i}, {i + 1}, &inputs[0]); + inputs[0].name = vae_decoder_info.name; + vae_decoder_->Infer(inputs, &outputs); + decoder_reuslt.emplace_back(std::move(outputs[0])); + } + FDTensor output_image; + function::Concat(decoder_reuslt, &output_image); + + function::Clip(output_image / 2.0f + 0.5f, 0, 1, &output_image); + function::Transpose(output_image, &output_image, {0, 2, 3, 1}); + + if (output_cv_mat) { + output_image = output_image * 255.0f; + function::Round(output_image, &output_image); + function::Cast(output_image, &output_image, FDDataType::UINT8); + } + + int output_batch_size = output_image.Shape()[0]; + output_images->resize(output_batch_size); + for (int i = 0; i < output_batch_size; ++i) { + function::Slice(output_image, {0}, {i}, &(*output_images)[i]); + } } } } // namespace fastdeploy diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h index 9f0fec35ec9..8fbbb901370 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h @@ -37,13 +37,14 @@ class StableDiffusionInpaintPipeline { const paddlenlp::fast_tokenizer::tokenizers_impl::ClipFastTokenizer& tokenizer); void Predict(const std::vector& prompts, cv::Mat* image, - cv::Mat* mask_image, FDTensor* output_image, int height = 512, - int width = 512, int num_inference_steps = 50, + cv::Mat* mask_image, std::vector* output_images, + int height = 512, int width = 512, int num_inference_steps = 50, float guidance_scale = 7.5, const std::vector& negative_prompt = {}, int num_images_per_prompt = 1, float eta = 0.0, uint32_t max_length = 77, const FDTensor* latents = nullptr, - callback_ptr callback = nullptr, int callback_steps = 1); + bool output_cv_mat = true, callback_ptr callback = nullptr, + int callback_steps = 1); private: void PrepareMaskAndMaskedImage(cv::Mat* image, cv::Mat* mask_mat, From 0133c229ffbf689fe4a72e965d63c00c6f231530 Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Thu, 1 Dec 2022 13:17:32 +0000 Subject: [PATCH 08/14] add PrepareMaskAndMaskedImage --- .../cpp/pipeline_stable_diffusion_inpaint.cc | 56 +++++++++++++++++-- .../cpp/pipeline_stable_diffusion_inpaint.h | 6 +- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index 3bc871193e7..4fca51afb17 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -14,6 +14,9 @@ #include "pipeline_stable_diffusion_inpaint.h" #include "fastdeploy/function/functions.h" +#include "fastdeploy/vision/common/processors/color_space_convert.h" +#include "fastdeploy/vision/common/processors/mat.h" +#include "fastdeploy/vision/common/processors/resize.h" #include using namespace paddlenlp; @@ -24,8 +27,53 @@ static constexpr int NUM_LATENT_CHANNELS = 4; static constexpr int NUM_UNET_INPUT_CHANNELS = 9; void StableDiffusionInpaintPipeline::PrepareMaskAndMaskedImage( - cv::Mat* image, cv::Mat* mask_mat, const std::vector& shape, - FDTensor* mask, FDTensor* mask_image) {} + const cv::Mat& image, const cv::Mat& mask_mat, + const std::vector& shape, FDTensor* mask, FDTensor* mask_image) { + vision::FDMat image_fdmat(image); + vision::BGR2RGB::Run(&image_fdmat, vision::ProcLib::OPENCV); + vision::Resize::Run(&image_fdmat, shape[1] * 8, shape[0] * 8, -1.0f, -1.0f, + cv::INTER_NEAREST, false, vision::ProcLib::OPENCV); + image_fdmat.CopyToTensor(mask_image); + + vision::FDMat mask_fdmat(mask_mat); + vision::BGR2GRAY::Run(&mask_fdmat, vision::ProcLib::OPENCV); + vision::Resize::Run(&mask_fdmat, shape[1] * 8, shape[0] * 8, -1.0f, -1.0f, + cv::INTER_NEAREST, false, vision::ProcLib::OPENCV); + FDTensor image_mask; + mask_fdmat.CopyToTensor(&image_mask); + function::Cast(image_mask, &image_mask, FDDataType::FP32); + std::vector float_mask(image_mask.Numel(), 0); + float* image_mask_ptr = reinterpret_cast(image_mask.Data()); + for (int i = 0; i < image_mask.Numel(); ++i) { + if (image_mask_ptr[i] < 127.5) { + float_mask[i] = 1; + } + } + image_mask.SetExternalData({1, 1, shape[1] * 8, shape[0] * 8}, + FDDataType::FP32, float_mask.data()); + + // Set mask_image + mask_image->ExpandDim(); + function::Transpose(*mask_image, mask_image, {0, 3, 1, 2}); + function::Cast(*mask_image, mask_image, FDDataType::FP32); + *mask_image = *mask_image / 127.5f - 1.0f; + *mask_image = *mask_image * image_mask; + + // Set mask + mask_fdmat.CopyToTensor(mask); + function::Cast(*mask, mask, FDDataType::FP32); + *mask = *mask / 255.0f; + mask->ExpandDim(); + mask->ExpandDim(); + float* mask_data = reinterpret_cast(mask->Data()); + for (int i = 0; i < mask->Numel(); ++i) { + if (mask_data[i] < 0.5) { + mask_data[i] = 0; + } else { + mask_data[i] = 1; + } + } +} StableDiffusionInpaintPipeline::StableDiffusionInpaintPipeline( std::unique_ptr vae_encoder, std::unique_ptr vae_decoder, @@ -39,8 +87,8 @@ StableDiffusionInpaintPipeline::StableDiffusionInpaintPipeline( scheduler_(std::move(scheduler)), tokenizer_(tokenizer) {} void StableDiffusionInpaintPipeline::Predict( - const std::vector& prompts, cv::Mat* image, - cv::Mat* mask_image, std::vector* output_images, int height, + const std::vector& prompts, const cv::Mat& image, + const cv::Mat& mask_image, std::vector* output_images, int height, int width, int num_inference_steps, float guidance_scale, const std::vector& negative_prompt, int num_images_per_prompt, float eta, uint32_t max_length, const FDTensor* latents, bool output_cv_mat, diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h index 8fbbb901370..063c370f354 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.h @@ -36,8 +36,8 @@ class StableDiffusionInpaintPipeline { std::unique_ptr scheduler, const paddlenlp::fast_tokenizer::tokenizers_impl::ClipFastTokenizer& tokenizer); - void Predict(const std::vector& prompts, cv::Mat* image, - cv::Mat* mask_image, std::vector* output_images, + void Predict(const std::vector& prompts, const cv::Mat& image, + const cv::Mat& mask_image, std::vector* output_images, int height = 512, int width = 512, int num_inference_steps = 50, float guidance_scale = 7.5, const std::vector& negative_prompt = {}, @@ -47,7 +47,7 @@ class StableDiffusionInpaintPipeline { int callback_steps = 1); private: - void PrepareMaskAndMaskedImage(cv::Mat* image, cv::Mat* mask_mat, + void PrepareMaskAndMaskedImage(const cv::Mat& image, const cv::Mat& mask_mat, const std::vector& shape, FDTensor* mask, FDTensor* mask_image); std::unique_ptr vae_encoder_; From 50784e3f07d2015c26e2f50e188e71cf81c704aa Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Thu, 1 Dec 2022 17:10:23 +0000 Subject: [PATCH 09/14] Add imwrite --- .../multimodal/stable_diffusion/cpp/main.cc | 121 ++++++++++++++++-- .../cpp/pipeline_stable_diffusion_inpaint.cc | 31 +++-- fastdeploy/function/tile.cc | 2 +- fastdeploy/utils/utils.h | 77 +++++------ 4 files changed, 166 insertions(+), 65 deletions(-) diff --git a/examples/multimodal/stable_diffusion/cpp/main.cc b/examples/multimodal/stable_diffusion/cpp/main.cc index 3c7d33029fd..c7701198193 100644 --- a/examples/multimodal/stable_diffusion/cpp/main.cc +++ b/examples/multimodal/stable_diffusion/cpp/main.cc @@ -13,23 +13,116 @@ // limitations under the License. #include "dpm_solver_multistep_scheduler.h" +#include "fastdeploy/vision/common/processors/mat.h" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "pipeline_stable_diffusion_inpaint.h" #include +#include +#include +#include + +template std::string Str(const T* value, int size) { + std::ostringstream oss; + oss << "[ " << value[0]; + for (int i = 1; i < size; ++i) { + oss << " ," << value[i]; + } + oss << " ]"; + return oss.str(); +} + +std::unique_ptr +CreateRuntime(const std::string& model_file, const std::string& params_file, + bool use_paddle_backend = true) { + fastdeploy::RuntimeOption runtime_option; + runtime_option.SetModelPath(model_file, params_file, + fastdeploy::ModelFormat::PADDLE); + runtime_option.UseGpu(); + if (use_paddle_backend) { + runtime_option.UsePaddleBackend(); + } else { + runtime_option.UseOrtBackend(); + } + std::unique_ptr runtime = + std::unique_ptr(new fastdeploy::Runtime()); + if (!runtime->Init(runtime_option)) { + std::cerr << "--- Init FastDeploy Runitme Failed! " + << "\n--- Model: " << model_file << std::endl; + return nullptr; + } else { + std::cout << "--- Init FastDeploy Runitme Done! " + << "\n--- Model: " << model_file << std::endl; + } + return runtime; +} int main() { - fastdeploy::DPMSolverMultistepScheduler dpm( - /* num_train_timesteps */ 1000, - /* beta_start = */ 0.00085, - /* beta_end = */ 0.012, - /* beta_schedule = */ "scaled_linear", - /* trained_betas = */ {}, - /* solver_order = */ 2, - /* predict_epsilon = */ true, - /* thresholding = */ false, - /* dynamic_thresholding_ratio = */ 0.995, - /* sample_max_value = */ 1.0, - /* algorithm_type = */ "dpmsolver++", - /* solver_type = */ "midpoint", - /* lower_order_final = */ true); + // 1. Init scheduler + std::unique_ptr dpm( + new fastdeploy::DPMSolverMultistepScheduler( + /* num_train_timesteps */ 1000, + /* beta_start = */ 0.00085, + /* beta_end = */ 0.012, + /* beta_schedule = */ "scaled_linear", + /* trained_betas = */ {}, + /* solver_order = */ 2, + /* predict_epsilon = */ true, + /* thresholding = */ false, + /* dynamic_thresholding_ratio = */ 0.995, + /* sample_max_value = */ 1.0, + /* algorithm_type = */ "dpmsolver++", + /* solver_type = */ "midpoint", + /* lower_order_final = */ true)); + + // 2. Init text encoder runtime + std::string text_model_file = "sd15_inpaint/text_encoder/inference.pdmodel"; + std::string text_params_file = + "sd15_inpaint/text_encoder/inference.pdiparams"; + std::unique_ptr text_encoder_runtime = + CreateRuntime(text_model_file, text_params_file, false); + + // 3. Init vae encoder runtime + std::string vae_encoder_model_file = + "sd15_inpaint/vae_encoder/inference.pdmodel"; + std::string vae_encoder_params_file = + "sd15_inpaint/vae_encoder/inference.pdiparams"; + std::unique_ptr vae_encoder_runtime = + CreateRuntime(vae_encoder_model_file, vae_encoder_params_file); + + // 4. Init vae decoder runtime + std::string vae_decoder_model_file = + "sd15_inpaint/vae_decoder/inference.pdmodel"; + std::string vae_decoder_params_file = + "sd15_inpaint/vae_decoder/inference.pdiparams"; + std::unique_ptr vae_decoder_runtime = + CreateRuntime(vae_decoder_model_file, vae_decoder_params_file); + + // 5. Init unet runtime + std::string unet_model_file = "sd15_inpaint/unet/inference.pdmodel"; + std::string unet_params_file = "sd15_inpaint/unet/inference.pdiparams"; + std::unique_ptr unet_runtime = + CreateRuntime(unet_model_file, unet_params_file); + + // 6. Init fast tokenizer + paddlenlp::fast_tokenizer::tokenizers_impl::ClipFastTokenizer tokenizer( + "clip/vocab.json", "clip/merges.txt", /* max_length = */ 77); + fastdeploy::StableDiffusionInpaintPipeline pipe( + std::move(vae_encoder_runtime), std::move(vae_decoder_runtime), + std::move(text_encoder_runtime), std::move(unet_runtime), + /* scheduler = */ std::move(dpm), tokenizer); + + // 7. Read images + auto image = cv::imread("overture-creations.png"); + auto mask_image = cv::imread("overture-creations-mask.png"); + // 8. Predict + std::vector prompts = { + "Face of a yellow cat, high resolution, sitting on a park bench"}; + std::vector outputs; + pipe.Predict(prompts, image, mask_image, &outputs, /* height = */ 512, + /* width = */ 512, /* num_inference_steps = */ 50); + fastdeploy::vision::FDMat mat = fastdeploy::vision::FDMat::Create(outputs[0]); + cv::imwrite("cat_on_bench_new.png", *mat.GetOpenCVMat()); return 0; } \ No newline at end of file diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index 4fca51afb17..deb69bf0a1e 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -33,14 +33,14 @@ void StableDiffusionInpaintPipeline::PrepareMaskAndMaskedImage( vision::BGR2RGB::Run(&image_fdmat, vision::ProcLib::OPENCV); vision::Resize::Run(&image_fdmat, shape[1] * 8, shape[0] * 8, -1.0f, -1.0f, cv::INTER_NEAREST, false, vision::ProcLib::OPENCV); - image_fdmat.CopyToTensor(mask_image); + image_fdmat.ShareWithTensor(mask_image); vision::FDMat mask_fdmat(mask_mat); vision::BGR2GRAY::Run(&mask_fdmat, vision::ProcLib::OPENCV); vision::Resize::Run(&mask_fdmat, shape[1] * 8, shape[0] * 8, -1.0f, -1.0f, cv::INTER_NEAREST, false, vision::ProcLib::OPENCV); FDTensor image_mask; - mask_fdmat.CopyToTensor(&image_mask); + mask_fdmat.ShareWithTensor(&image_mask); function::Cast(image_mask, &image_mask, FDDataType::FP32); std::vector float_mask(image_mask.Numel(), 0); float* image_mask_ptr = reinterpret_cast(image_mask.Data()); @@ -60,11 +60,15 @@ void StableDiffusionInpaintPipeline::PrepareMaskAndMaskedImage( *mask_image = *mask_image * image_mask; // Set mask - mask_fdmat.CopyToTensor(mask); + vision::FDMat mask_fdmat_t(mask_mat); + vision::BGR2GRAY::Run(&mask_fdmat_t, vision::ProcLib::OPENCV); + vision::Resize::Run(&mask_fdmat_t, shape[1], shape[0], -1.0f, -1.0f, + cv::INTER_NEAREST, false, vision::ProcLib::OPENCV); + mask_fdmat_t.ShareWithTensor(mask); function::Cast(*mask, mask, FDDataType::FP32); *mask = *mask / 255.0f; mask->ExpandDim(); - mask->ExpandDim(); + function::Transpose(*mask, mask, {0, 3, 1, 2}); float* mask_data = reinterpret_cast(mask->Data()); for (int i = 0; i < mask->Numel(); ++i) { if (mask_data[i] < 0.5) { @@ -96,10 +100,10 @@ void StableDiffusionInpaintPipeline::Predict( int batch_size = prompts.size(); FDASSERT(batch_size >= 1, "prompts should not be empty"); FDASSERT( - height % 8 != 0 or width % 8 != 0, + height % 8 == 0 && width % 8 == 0, "`height` and `width` have to be divisible by 8 but are {%d} and {%d}.", height, width); - FDASSERT(callback_steps <= 0, + FDASSERT(callback_steps > 0, "`callback_steps` has to be a positive integer but is {%d}", callback_steps); @@ -199,25 +203,26 @@ void StableDiffusionInpaintPipeline::Predict( vae_encoder_->Infer(inputs, &outputs); FDTensor masked_image_latents = 0.18215 * outputs[0]; - auto mask_shape = mask_t.Shape(); + std::vector mask_shape(mask_t.Shape().size(), 1); mask_shape[0] = batch_size * num_images_per_prompt; function::Tile(mask_t, mask_shape, &mask_t); - auto mask_image_shape = mask_image_t.Shape(); + std::vector mask_image_shape(masked_image_latents.Shape().size(), 1); mask_image_shape[0] = batch_size * num_images_per_prompt; - function::Tile(mask_image_t, mask_image_shape, &mask_image_t); + function::Tile(masked_image_latents, mask_image_shape, &masked_image_latents); if (do_classifier_free_guidance) { function::Concat({mask_t, mask_t}, &mask_t); - function::Concat({mask_image_t, mask_image_t}, &mask_image_t); + function::Concat({masked_image_latents, masked_image_latents}, + &masked_image_latents); } int num_channels_mask = mask_t.Shape()[1]; - int num_channels_masked_image = mask_image_t.Shape()[1]; + int num_channels_masked_image = masked_image_latents.Shape()[1]; FDASSERT( NUM_LATENT_CHANNELS + num_channels_mask + num_channels_masked_image == NUM_UNET_INPUT_CHANNELS, "Incorrect configuration settings! The config of `pipeline.unet` expects" - " {%d} but received `num_channels_latents`: %d + `num_channels_mask`: %d " + " %d but received `num_channels_latents`: %d + `num_channels_mask`: %d " "+ `num_channels_masked_image`: %d" " = %d. Please verify the config of `pipeline.unet` or your `mask_image` " "or `image` input.", @@ -244,7 +249,7 @@ void StableDiffusionInpaintPipeline::Predict( latent_model_input = actual_latents; } // concat latents, mask, masked_image_latnets in the channel dimension - function::Concat({latent_model_input, mask_t, mask_image_t}, + function::Concat({latent_model_input, mask_t, masked_image_latents}, &latent_model_input, 1); scheduler_->ScaleModelInput(latent_model_input, &latent_model_input, {t}); diff --git a/fastdeploy/function/tile.cc b/fastdeploy/function/tile.cc index 143d9d6dbef..c6e3095c6fb 100644 --- a/fastdeploy/function/tile.cc +++ b/fastdeploy/function/tile.cc @@ -62,7 +62,7 @@ void TileFunctor(const FDTensor& x, out_tmp.Allocate(out_shape, x.Dtype()); auto eigen_x = EigenTensor::From(x, x_shape); - auto eigen_out = EigenTensor::From(*out, out_shape); + auto eigen_out = EigenTensor::From(out_tmp, out_shape); const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); eigen_out.device(dev) = eigen_x.broadcast(bcast_dims); diff --git a/fastdeploy/utils/utils.h b/fastdeploy/utils/utils.h index 9b2a0fe2013..0dff28f8be0 100644 --- a/fastdeploy/utils/utils.h +++ b/fastdeploy/utils/utils.h @@ -66,7 +66,8 @@ class FASTDEPLOY_DECL FDLogger { if (!verbose_ && line_ != "") { std::cout << line_ << std::endl; #ifdef __ANDROID__ - __android_log_print(ANDROID_LOG_INFO, prefix_.c_str(), "%s", line_.c_str()); + __android_log_print(ANDROID_LOG_INFO, prefix_.c_str(), "%s", + line_.c_str()); #endif } } @@ -122,6 +123,8 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file, [&] { \ const auto& __dtype__ = TYPE; \ switch (__dtype__) { \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::UINT8, uint8_t, \ + __VA_ARGS__) \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::BOOL, bool, \ __VA_ARGS__) \ FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ @@ -141,26 +144,26 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file, } \ }() -#define FD_VISIT_INT_FLOAT_TYPES(TYPE, NAME, ...) \ - [&] { \ - const auto& __dtype__ = TYPE; \ - switch (__dtype__) { \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::UINT8, uint8_t, \ - __VA_ARGS__) \ - default: \ - FDASSERT(false, \ - "Invalid enum data type. Expect to accept data type INT32, " \ - "INT64, FP32, FP64, UINT8 but receive type %s.", \ - Str(__dtype__).c_str()); \ - } \ +#define FD_VISIT_INT_FLOAT_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto& __dtype__ = TYPE; \ + switch (__dtype__) { \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::UINT8, uint8_t, \ + __VA_ARGS__) \ + default: \ + FDASSERT(false, \ + "Invalid enum data type. Expect to accept data type INT32, " \ + "INT64, FP32, FP64, UINT8 but receive type %s.", \ + Str(__dtype__).c_str()); \ + } \ }() #define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...) \ @@ -179,22 +182,22 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file, } \ }() -#define FD_VISIT_INT_TYPES(TYPE, NAME, ...) \ - [&] { \ - const auto& __dtype__ = TYPE; \ - switch (__dtype__) { \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::UINT8, uint8_t, \ - __VA_ARGS__) \ - default: \ - FDASSERT(false, \ - "Invalid enum data type. Expect to accept data type INT32, " \ - "INT64, UINT8 but receive type %s.", \ - Str(__dtype__).c_str()); \ - } \ +#define FD_VISIT_INT_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto& __dtype__ = TYPE; \ + switch (__dtype__) { \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ + __VA_ARGS__) \ + FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::UINT8, uint8_t, \ + __VA_ARGS__) \ + default: \ + FDASSERT(false, \ + "Invalid enum data type. Expect to accept data type INT32, " \ + "INT64, UINT8 but receive type %s.", \ + Str(__dtype__).c_str()); \ + } \ }() FASTDEPLOY_DECL std::vector From 1ed6e29fa8fb329986a902779c5e484977700d7d Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Thu, 1 Dec 2022 17:23:54 +0000 Subject: [PATCH 10/14] Add time counter --- examples/multimodal/stable_diffusion/cpp/main.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/multimodal/stable_diffusion/cpp/main.cc b/examples/multimodal/stable_diffusion/cpp/main.cc index c7701198193..62bcdfd1db6 100644 --- a/examples/multimodal/stable_diffusion/cpp/main.cc +++ b/examples/multimodal/stable_diffusion/cpp/main.cc @@ -14,6 +14,7 @@ #include "dpm_solver_multistep_scheduler.h" #include "fastdeploy/vision/common/processors/mat.h" +#include "fastdeploy/utils/perf.h" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" #include "pipeline_stable_diffusion_inpaint.h" @@ -120,8 +121,12 @@ int main() { std::vector prompts = { "Face of a yellow cat, high resolution, sitting on a park bench"}; std::vector outputs; + fastdeploy::TimeCounter tc; + tc.Start(); pipe.Predict(prompts, image, mask_image, &outputs, /* height = */ 512, /* width = */ 512, /* num_inference_steps = */ 50); + tc.End(); + tc.PrintInfo(); fastdeploy::vision::FDMat mat = fastdeploy::vision::FDMat::Create(outputs[0]); cv::imwrite("cat_on_bench_new.png", *mat.GetOpenCVMat()); return 0; From c3e686f6d445b5b30e8f6ce5cfe98230eea19786 Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Thu, 1 Dec 2022 17:32:40 +0000 Subject: [PATCH 11/14] Fix pipeline --- .../cpp/pipeline_stable_diffusion_inpaint.cc | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index deb69bf0a1e..49b967f5af6 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -280,37 +280,37 @@ void StableDiffusionInpaintPipeline::Predict( if (callback != nullptr && i % callback_steps == 0) { callback(i, time, &actual_latents); } - actual_latents = (1.0f / 0.18215f) * actual_latents; + } + actual_latents = (1.0f / 0.18215f) * actual_latents; - // Get vae decoder output - int actual_latents_bs = actual_latents.Shape()[0]; - TensorInfo vae_decoder_info = vae_decoder_->GetInputInfo(0); - inputs.resize(1); - outputs.resize(vae_decoder_->GetOutputInfos().size()); - std::vector decoder_reuslt; - for (int i = 0; i < actual_latents_bs; ++i) { - function::Slice(actual_latents, {0}, {i}, {i + 1}, &inputs[0]); - inputs[0].name = vae_decoder_info.name; - vae_decoder_->Infer(inputs, &outputs); - decoder_reuslt.emplace_back(std::move(outputs[0])); - } - FDTensor output_image; - function::Concat(decoder_reuslt, &output_image); + // Get vae decoder output + int actual_latents_bs = actual_latents.Shape()[0]; + TensorInfo vae_decoder_info = vae_decoder_->GetInputInfo(0); + inputs.resize(1); + outputs.resize(vae_decoder_->GetOutputInfos().size()); + std::vector decoder_reuslt; + for (int i = 0; i < actual_latents_bs; ++i) { + function::Slice(actual_latents, {0}, {i}, {i + 1}, &inputs[0]); + inputs[0].name = vae_decoder_info.name; + vae_decoder_->Infer(inputs, &outputs); + decoder_reuslt.emplace_back(std::move(outputs[0])); + } + FDTensor output_image; + function::Concat(decoder_reuslt, &output_image); - function::Clip(output_image / 2.0f + 0.5f, 0, 1, &output_image); - function::Transpose(output_image, &output_image, {0, 2, 3, 1}); + function::Clip(output_image / 2.0f + 0.5f, 0, 1, &output_image); + function::Transpose(output_image, &output_image, {0, 2, 3, 1}); - if (output_cv_mat) { - output_image = output_image * 255.0f; - function::Round(output_image, &output_image); - function::Cast(output_image, &output_image, FDDataType::UINT8); - } + if (output_cv_mat) { + output_image = output_image * 255.0f; + function::Round(output_image, &output_image); + function::Cast(output_image, &output_image, FDDataType::UINT8); + } - int output_batch_size = output_image.Shape()[0]; - output_images->resize(output_batch_size); - for (int i = 0; i < output_batch_size; ++i) { - function::Slice(output_image, {0}, {i}, &(*output_images)[i]); - } + int output_batch_size = output_image.Shape()[0]; + output_images->resize(output_batch_size); + for (int i = 0; i < output_batch_size; ++i) { + function::Slice(output_image, {0}, {i}, &(*output_images)[i]); } } } // namespace fastdeploy From e5492222c35bbb20e98ba4db2644a79d1f316343 Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Thu, 1 Dec 2022 18:13:29 +0000 Subject: [PATCH 12/14] use FDTensor move --- .../cpp/pipeline_stable_diffusion_inpaint.cc | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index 49b967f5af6..76da1e815de 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -238,11 +238,15 @@ void StableDiffusionInpaintPipeline::Predict( auto timestep = scheduler_->GetTimesteps(); int64_t* timestep_data = reinterpret_cast(timestep.Data()); + outputs.resize(unet_->GetOutputInfos().size()); + inputs.resize(unet_->GetInputInfos().size()); + inputs[2] = std::move(text_embeddings); + auto unet_infos = unet_->GetInputInfos(); for (int i = 0; i < timestep.Numel(); ++i) { - FDTensor t; + FDTensor& t = inputs[1]; function::Slice(timestep, {0}, {i}, &t); // expand the latents if we are doing classifier free guidance - FDTensor latent_model_input; + FDTensor& latent_model_input = inputs[0]; if (do_classifier_free_guidance) { function::Concat({actual_latents, actual_latents}, &latent_model_input); } else { @@ -254,15 +258,11 @@ void StableDiffusionInpaintPipeline::Predict( scheduler_->ScaleModelInput(latent_model_input, &latent_model_input, {t}); // predict the noise residual - FDTensor noise_pred; - auto unet_infos = unet_->GetInputInfos(); - latent_model_input.name = unet_infos[0].name; - t.name = unet_infos[1].name; - text_embeddings.name = unet_infos[2].name; - outputs.resize(unet_->GetOutputInfos().size()); - inputs = {latent_model_input, t, text_embeddings}; + for (int i = 0; i < unet_infos.size(); ++i) { + inputs[i].name = unet_infos[i].name; + } unet_->Infer(inputs, &outputs); - noise_pred = std::move(outputs[0]); + FDTensor noise_pred = std::move(outputs[0]); // perform guidance if (do_classifier_free_guidance) { std::vector noise_preds; From 23cb00da059c09811aed104ae3c1c599ecb01413 Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Fri, 2 Dec 2022 09:10:01 +0000 Subject: [PATCH 13/14] Fix scaled_linear dpm solver --- .../cpp/dpm_solver_multistep_scheduler.cc | 5 ++--- .../cpp/pipeline_stable_diffusion_inpaint.cc | 9 +++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc b/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc index b6ac2220014..b61c5b5db17 100644 --- a/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc +++ b/examples/multimodal/stable_diffusion/cpp/dpm_solver_multistep_scheduler.cc @@ -57,8 +57,8 @@ DPMSolverMultistepScheduler::DPMSolverMultistepScheduler( function::Linspace(beta_start, beta_end, num_train_timesteps, &betas_, FDDataType::FP32); } else if (beta_schedule == "scaled_linear") { - function::Linspace(beta_start, beta_end, num_train_timesteps, &betas_, - FDDataType::FP32); + function::Linspace(std::sqrt(beta_start), std::sqrt(beta_end), + num_train_timesteps, &betas_, FDDataType::FP32); betas_ = betas_ * betas_; } else if (beta_schedule == "squaredcos_cap_v2") { BetaForAlphaBar(&betas_, num_train_timesteps); @@ -316,7 +316,6 @@ void DPMSolverMultistepScheduler::Step(const FDTensor& model_output, if (timesteps_iter - timesteps_data < timesteps_.Numel()) { step_index = timesteps_iter - timesteps_data; } - int64_t prev_timestep = 0; if (step_index != timesteps_.Numel() - 1) { prev_timestep = timesteps_data[step_index + 1]; diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index 76da1e815de..6699fca1488 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -201,7 +201,7 @@ void StableDiffusionInpaintPipeline::Predict( outputs.resize(vae_encoder_->GetOutputInfos().size()); inputs = {mask_image_t}; vae_encoder_->Infer(inputs, &outputs); - FDTensor masked_image_latents = 0.18215 * outputs[0]; + FDTensor masked_image_latents = 0.18215f * outputs[0]; std::vector mask_shape(mask_t.Shape().size(), 1); mask_shape[0] = batch_size * num_images_per_prompt; @@ -243,10 +243,11 @@ void StableDiffusionInpaintPipeline::Predict( inputs[2] = std::move(text_embeddings); auto unet_infos = unet_->GetInputInfos(); for (int i = 0; i < timestep.Numel(); ++i) { - FDTensor& t = inputs[1]; + FDTensor t; function::Slice(timestep, {0}, {i}, &t); + inputs[1] = t; // expand the latents if we are doing classifier free guidance - FDTensor& latent_model_input = inputs[0]; + FDTensor latent_model_input; if (do_classifier_free_guidance) { function::Concat({actual_latents, actual_latents}, &latent_model_input); } else { @@ -256,7 +257,7 @@ void StableDiffusionInpaintPipeline::Predict( function::Concat({latent_model_input, mask_t, masked_image_latents}, &latent_model_input, 1); scheduler_->ScaleModelInput(latent_model_input, &latent_model_input, {t}); - + inputs[0] = std::move(latent_model_input); // predict the noise residual for (int i = 0; i < unet_infos.size(); ++i) { inputs[i].name = unet_infos[i].name; From cb65b3b57c5b03765617fc0e9218626b5c917db3 Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Fri, 2 Dec 2022 10:09:19 +0000 Subject: [PATCH 14/14] Add RGB2BGR --- .../cpp/pipeline_stable_diffusion_inpaint.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc index 6699fca1488..cbe352a710e 100644 --- a/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc +++ b/examples/multimodal/stable_diffusion/cpp/pipeline_stable_diffusion_inpaint.cc @@ -307,11 +307,16 @@ void StableDiffusionInpaintPipeline::Predict( function::Round(output_image, &output_image); function::Cast(output_image, &output_image, FDDataType::UINT8); } - int output_batch_size = output_image.Shape()[0]; output_images->resize(output_batch_size); for (int i = 0; i < output_batch_size; ++i) { function::Slice(output_image, {0}, {i}, &(*output_images)[i]); + vision::FDMat mask_fdmat_t = vision::FDMat::Create((*output_images)[i]); + vision::RGB2BGR::Run(&mask_fdmat_t, vision::ProcLib::OPENCV); + mask_fdmat_t.CopyToTensor(&(*output_images)[i]); + FDTensor sum; + function::Sum((*output_images)[i], &sum, {}, false, true); + FDINFO << "sum = " << ((float*)sum.Data())[0] << std::endl; } } } // namespace fastdeploy