diff --git a/src/platform/windows/display.h b/src/platform/windows/display.h index e4d43b336ef..247138b2157 100644 --- a/src/platform/windows/display.h +++ b/src/platform/windows/display.h @@ -36,7 +36,7 @@ using dup_t = util::safe_ptr>; using texture1d_t = util::safe_ptr>; using resource_t = util::safe_ptr>; -using multithread_t = util::safe_ptr>; +using multithread_t = util::safe_ptr>; using vs_t = util::safe_ptr>; using ps_t = util::safe_ptr>; using blend_t = util::safe_ptr>; @@ -176,6 +176,8 @@ class display_vram_t : public display_base_t, public std::enable_shared_from_thi std::shared_ptr make_hwdevice(pix_fmt_e pix_fmt) override; + multithread_t multithread; + sampler_state_t sampler_linear; blend_t blend_enable; diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index ae84bfb0efd..a3c7bd828b0 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -237,6 +237,8 @@ class hwdevice_t : public platf::hwdevice_t { int convert(platf::img_t &img_base) override { auto &img = (img_d3d_t &)img_base; + multithread->Enter(); + device_ctx_p->IASetInputLayout(input_layout.get()); _init_view_port(this->img.width, this->img.height); @@ -266,6 +268,8 @@ class hwdevice_t : public platf::hwdevice_t { device_ctx_p->Draw(3, 0); device_ctx_p->Flush(); + multithread->Leave(); + return 0; } @@ -288,14 +292,17 @@ class hwdevice_t : public platf::hwdevice_t { ++color_p; } - auto color_matrix = make_buffer((device_t::pointer)data, *color_p); + auto color_matrix = make_buffer(device_p, *color_p); if(!color_matrix) { BOOST_LOG(warning) << "Failed to create color matrix"sv; return; } + multithread->Enter(); device_ctx_p->VSSetConstantBuffers(0, 1, &info_scene); device_ctx_p->PSSetConstantBuffers(0, 1, &color_matrix); + multithread->Leave(); + this->color_matrix = std::move(color_matrix); } @@ -303,8 +310,6 @@ class hwdevice_t : public platf::hwdevice_t { this->hwframe.reset(frame); this->frame = frame; - auto device_p = (device_t::pointer)data; - auto out_width = frame->width; auto out_height = frame->height; @@ -399,10 +404,20 @@ class hwdevice_t : public platf::hwdevice_t { HRESULT status; device_p->AddRef(); - data = device_p; + this->device_p = device_p; this->device_ctx_p = device_ctx_p; + status = device_p->QueryInterface(IID_ID3D10Multithread, (void **)&multithread); + if(FAILED(status)) { + BOOST_LOG(error) << "Couldn't query ID3D10Multithread [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + + data_ptrs[0] = device_p; + data_ptrs[1] = multithread.get(); + this->data = data_ptrs; + format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010); status = device_p->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs); if(status) { @@ -467,8 +482,8 @@ class hwdevice_t : public platf::hwdevice_t { } ~hwdevice_t() override { - if(data) { - ((ID3D11Device *)data)->Release(); + if(device_p) { + device_p->Release(); } } @@ -518,7 +533,11 @@ class hwdevice_t : public platf::hwdevice_t { DXGI_FORMAT format; + device_t::pointer device_p; device_ctx_t::pointer device_ctx_p; + multithread_t multithread; + + void *data_ptrs[2] {}; }; capture_e display_vram_t::capture(snapshot_cb_t &&snapshot_cb, std::shared_ptr<::platf::img_t> img, bool *cursor) { @@ -685,6 +704,7 @@ capture_e display_vram_t::snapshot(platf::img_t *img_base, std::chrono::millisec 0.0f, 1.0f }; + multithread->Enter(); device_ctx->VSSetShader(scene_vs.get(), nullptr, 0); device_ctx->PSSetShader(scene_ps.get(), nullptr, 0); device_ctx->RSSetViewports(1, &view); @@ -694,6 +714,7 @@ capture_e display_vram_t::snapshot(platf::img_t *img_base, std::chrono::millisec device_ctx->RSSetViewports(1, &cursor.cursor_view); device_ctx->Draw(3, 0); device_ctx->OMSetBlendState(blend_disable.get(), nullptr, 0xFFFFFFFFu); + multithread->Leave(); } return capture_e::ok; @@ -704,6 +725,15 @@ int display_vram_t::init(int framerate, const std::string &display_name) { return -1; } + auto status = device->QueryInterface(IID_ID3D10Multithread, (void **)&multithread); + if(FAILED(status)) { + BOOST_LOG(error) << "Couldn't query ID3D10Multithread [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + + // Enable multi-thread protection on our device for PARALLEL_ENCODING + multithread->SetMultithreadProtected(true); + D3D11_SAMPLER_DESC sampler_desc {}; sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; @@ -713,7 +743,7 @@ int display_vram_t::init(int framerate, const std::string &display_name) { sampler_desc.MinLOD = 0; sampler_desc.MaxLOD = D3D11_FLOAT32_MAX; - auto status = device->CreateSamplerState(&sampler_desc, &sampler_linear); + status = device->CreateSamplerState(&sampler_desc, &sampler_linear); if(FAILED(status)) { BOOST_LOG(error) << "Failed to create point sampler state [0x"sv << util::hex(status).to_string_view() << ']'; return -1; diff --git a/src/video.cpp b/src/video.cpp index 5d353a385e5..e2c3b2cb845 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -442,7 +442,7 @@ static encoder_t nvenc { "h264_nvenc"s, }, #ifdef _WIN32 - DEFAULT, + PARALLEL_ENCODING, dxgi_make_hwdevice_ctx #else PARALLEL_ENCODING, @@ -486,7 +486,7 @@ static encoder_t amdvce { std::make_optional({ "qp_p"s, &config::video.qp }), "h264_amf"s, }, - DEFAULT, + PARALLEL_ENCODING, dxgi_make_hwdevice_ctx }; #endif @@ -1334,7 +1334,7 @@ void captureThreadSync() { ctx.shutdown_event->raise(true); ctx.join_event->raise(true); } - }); + }); while(encode_run_sync(synced_session_ctxs, ctx) == encode_e::reinit) {} } @@ -1350,7 +1350,7 @@ void capture_async( auto lg = util::fail_guard([&]() { images->stop(); shutdown_event->raise(true); - }); + }); auto ref = capture_thread_async.ref(); if(!ref) { @@ -1756,7 +1756,15 @@ util::Either cuda_make_hwdevice_ctx(platf::hwdevice_t *base) { #ifdef _WIN32 } -void do_nothing(void *) {} +void enter_multithread(void *ctx) { + auto mt = (ID3D10Multithread *)ctx; + mt->Enter(); +} + +void leave_multithread(void *ctx) { + auto mt = (ID3D10Multithread *)ctx; + mt->Leave(); +} namespace video { util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { @@ -1765,14 +1773,14 @@ util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_c std::fill_n((std::uint8_t *)ctx, sizeof(AVD3D11VADeviceContext), 0); - auto device = (ID3D11Device *)hwdevice_ctx->data; - - device->AddRef(); - ctx->device = device; + // display_vram_t sets data to [ID3D11Device*, ID3D10Multithread*] + auto data = (void **)hwdevice_ctx->data; + ctx->device = (ID3D11Device *)data[0]; + ctx->lock_ctx = (ID3D10Multithread *)data[1]; + ctx->lock = enter_multithread; + ctx->unlock = leave_multithread; - ctx->lock_ctx = (void *)1; - ctx->lock = do_nothing; - ctx->unlock = do_nothing; + ctx->device->AddRef(); auto err = av_hwdevice_ctx_init(ctx_buf.get()); if(err) {