diff --git a/docs/source/about/advanced_usage.rst b/docs/source/about/advanced_usage.rst index cf4a348c184..11fe61c87a2 100644 --- a/docs/source/about/advanced_usage.rst +++ b/docs/source/about/advanced_usage.rst @@ -414,6 +414,41 @@ dwmflush dwmflush = enabled +unpaced +^^^^^^^^ + +**Description** + Don't try to match client frame rate and stream every frame the host produces as soon as possible. + Can significantly improve frame time stability. + + .. Caution:: Applies to Windows only. Experimental option. + +**Default** + ``disabled`` + +**Example** + .. code-block:: text + + unpaced = disabled + +serial +^^^^^^^^ + +**Description** + Don't use parallel devices for capture and encode. + Improves frame time stability and lowers overall streaming performance hit. + Lowers max theoretical throughput, use only if your gpu hardware encoder has enough headroom (most do). + + .. Caution:: Applies to Windows only. Experimental option. + +**Default** + ``disabled`` + +**Example** + .. code-block:: text + + serial = disabled + Audio ----- diff --git a/src/config.cpp b/src/config.cpp index 1e4276dfe30..0237c412c41 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -371,7 +371,9 @@ namespace config { {}, // encoder {}, // adapter_name {}, // output_name - true // dwmflush + true, // dwmflush + false, // unpaced + false, // serial }; audio_t audio {}; @@ -957,6 +959,8 @@ namespace config { string_f(vars, "adapter_name", video.adapter_name); string_f(vars, "output_name", video.output_name); bool_f(vars, "dwmflush", video.dwmflush); + bool_f(vars, "unpaced", video.unpaced); + bool_f(vars, "serial", video.serial); path_f(vars, "pkey", nvhttp.pkey); path_f(vars, "cert", nvhttp.cert); diff --git a/src/config.h b/src/config.h index fc77748bdde..87a4e716866 100644 --- a/src/config.h +++ b/src/config.h @@ -57,6 +57,8 @@ namespace config { std::string adapter_name; std::string output_name; bool dwmflush; + bool unpaced; + bool serial; }; struct audio_t { diff --git a/src/platform/windows/display.h b/src/platform/windows/display.h index 66b23d74d15..754031f042e 100644 --- a/src/platform/windows/display.h +++ b/src/platform/windows/display.h @@ -163,6 +163,8 @@ namespace platf::dxgi { return (capture_format == DXGI_FORMAT_R16G16B16A16_FLOAT) ? 8 : 4; } + bool mouse_pointer_visible = false; + const char * dxgi_format_to_string(DXGI_FORMAT format); const char * diff --git a/src/platform/windows/display_base.cpp b/src/platform/windows/display_base.cpp index 02b3fca136c..5bc0f00ee1e 100644 --- a/src/platform/windows/display_base.cpp +++ b/src/platform/windows/display_base.cpp @@ -32,7 +32,7 @@ namespace platf::dxgi { return capture_status; } - if (use_dwmflush) { + if (!config::video.unpaced && use_dwmflush) { DwmFlush(); } @@ -70,19 +70,20 @@ namespace platf::dxgi { } auto status = dup->ReleaseFrame(); + has_frame = false; switch (status) { case S_OK: - has_frame = false; return capture_e::ok; - case DXGI_ERROR_WAIT_TIMEOUT: - return capture_e::timeout; - case WAIT_ABANDONED: + + case DXGI_ERROR_INVALID_CALL: + BOOST_LOG(warning) << "Duplication frame already released"; + return capture_e::ok; + case DXGI_ERROR_ACCESS_LOST: - case DXGI_ERROR_ACCESS_DENIED: - has_frame = false; return capture_e::reinit; + default: - BOOST_LOG(error) << "Couldn't release frame [0x"sv << util::hex(status).to_string_view(); + BOOST_LOG(error) << "Error while releasing duplication frame [0x"sv << util::hex(status).to_string_view(); return capture_e::error; } } @@ -110,6 +111,13 @@ namespace platf::dxgi { CloseHandle(timer); }); + if (config::video.unpaced) { + BOOST_LOG(info) << "Using experimental unpaced path"; + } + if (config::video.serial) { + BOOST_LOG(info) << "Using experimental serial path"; + } + while (true) { // This will return false if the HDR state changes or for any number of other // display or GPU changes. We should reinit to examine the updated state of @@ -118,25 +126,36 @@ namespace platf::dxgi { return platf::capture_e::reinit; } - // If the wait time is between 1 us and 1 second, wait the specified time - // and offset the next frame time from the exact current frame time target. - auto wait_time_us = std::chrono::duration_cast(next_frame - std::chrono::steady_clock::now()).count(); - if (wait_time_us > 0 && wait_time_us < 1000000) { - LARGE_INTEGER due_time { .QuadPart = -10LL * wait_time_us }; - SetWaitableTimer(timer, &due_time, 0, nullptr, nullptr, false); - WaitForSingleObject(timer, INFINITE); - next_frame += delay; - } - else { - // If the wait time is negative (meaning the frame is past due) or the - // computed wait time is beyond a second (meaning possible clock issues), - // just capture the frame now and resynchronize the frame interval with - // the current time. - next_frame = std::chrono::steady_clock::now() + delay; + if (!config::video.unpaced) { + // If the wait time is between 1 microsecond and 1 second, wait the specified time + // and offset the next frame time from the exact current frame time target. + auto wait_time_us = std::chrono::duration_cast(next_frame - std::chrono::steady_clock::now()).count(); + if (wait_time_us > 0 && wait_time_us < 1000000) { + LARGE_INTEGER due_time { .QuadPart = -10LL * wait_time_us }; + SetWaitableTimer(timer, &due_time, 0, nullptr, nullptr, false); + WaitForSingleObject(timer, INFINITE); + next_frame += delay; + } + else { + // If the wait time is negative (meaning the frame is past due) or the + // computed wait time is beyond a second (meaning possible clock issues), + // just capture the frame now and resynchronize the frame interval with + // the current time. + next_frame = std::chrono::steady_clock::now() + delay; + } } std::shared_ptr img_out; - auto status = snapshot(pull_free_image_cb, img_out, 1000ms, *cursor); + + std::chrono::milliseconds timeout = 1000ms; + if (config::video.serial) { + timeout = 100ms; + if (config::video.unpaced && mouse_pointer_visible) { + timeout = 0ms; + } + } + + auto status = snapshot(pull_free_image_cb, img_out, timeout, *cursor); switch (status) { case platf::capture_e::reinit: case platf::capture_e::error: @@ -156,6 +175,18 @@ namespace platf::dxgi { BOOST_LOG(error) << "Unrecognized capture status ["sv << (int) status << ']'; return status; } + + if (config::video.unpaced && mouse_pointer_visible) { + // Release desktop duplication frame so mouse pointer updates won't be delayed. + auto status = dup.release_frame(); + if (status != platf::capture_e::ok) { + return status; + } + // Limit and pace mouse pointer updates. + if (output) { + output->WaitForVBlank(); + } + } } return capture_e::ok; diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index 93cef64e9aa..89d0c8b9dc7 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -11,6 +11,7 @@ extern "C" { } #include "display.h" +#include "src/config.h" #include "src/main.h" #include "src/video.h" @@ -105,6 +106,7 @@ namespace platf::dxgi { texture2d_t capture_texture; render_target_t capture_rt; keyed_mutex_t capture_mutex; + shader_res_t capture_srv; // This is the shared handle used by hwdevice_t to open capture_texture HANDLE encoder_texture_handle = {}; @@ -360,28 +362,44 @@ namespace platf::dxgi { public: int convert(platf::img_t &img_base) override { - // Garbage collect mapped capture images whose weak references have expired - for (auto it = img_ctx_map.begin(); it != img_ctx_map.end();) { - if (it->second.img_weak.expired()) { - it = img_ctx_map.erase(it); + auto &img = (img_d3d_t &) img_base; + + auto get_locked_img_srv = [&]() -> std::tuple { + if (config::video.serial) { + return { img.capture_srv.get(), nullptr }; } else { - it++; - } - } + // Garbage collect mapped capture images whose weak references have expired + for (auto it = img_ctx_map.begin(); it != img_ctx_map.end();) { + if (it->second.img_weak.expired()) { + it = img_ctx_map.erase(it); + } + else { + it++; + } + } - auto &img = (img_d3d_t &) img_base; - auto &img_ctx = img_ctx_map[img.id]; + auto &img_ctx = img_ctx_map[img.id]; - // Open the shared capture texture with our ID3D11Device - if (initialize_image_context(img, img_ctx)) { - return -1; - } + // Open the shared capture texture with our ID3D11Device + if (initialize_image_context(img, img_ctx)) { + return { nullptr, nullptr }; + } + + // Acquire encoder mutex to synchronize with capture code + auto lock_helper = texture_lock_helper(img_ctx.encoder_mutex.get()); + if (!lock_helper.lock()) { + BOOST_LOG(error) << "Failed to acquire encoder texture mutex"; + return { nullptr, nullptr }; + } + + return { img_ctx.encoder_input_res.get(), std::move(lock_helper) }; + } + }; + + auto [img_srv, optional_lock] = get_locked_img_srv(); - // Acquire encoder mutex to synchronize with capture code - auto status = img_ctx.encoder_mutex->AcquireSync(0, INFINITE); - if (status != S_OK) { - BOOST_LOG(error) << "Failed to acquire encoder mutex [0x"sv << util::hex(status).to_string_view() << ']'; + if (!img_srv) { return -1; } @@ -389,7 +407,7 @@ namespace platf::dxgi { device_ctx->VSSetShader(scene_vs.get(), nullptr, 0); device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_fp16_ps.get() : convert_Y_ps.get(), nullptr, 0); device_ctx->RSSetViewports(1, &outY_view); - device_ctx->PSSetShaderResources(0, 1, &img_ctx.encoder_input_res); + device_ctx->PSSetShaderResources(0, 1, &img_srv); device_ctx->Draw(3, 0); device_ctx->OMSetRenderTargets(1, &nv12_UV_rt, nullptr); @@ -398,9 +416,6 @@ namespace platf::dxgi { device_ctx->RSSetViewports(1, &outUV_view); device_ctx->Draw(3, 0); - // Release encoder mutex to allow capture code to reuse this image - img_ctx.encoder_mutex->ReleaseSync(0); - ID3D11ShaderResourceView *emptyShaderResourceView = nullptr; device_ctx->PSSetShaderResources(0, 1, &emptyShaderResourceView); @@ -569,33 +584,42 @@ namespace platf::dxgi { } int - init( - std::shared_ptr display, adapter_t::pointer adapter_p, - pix_fmt_e pix_fmt) { - D3D_FEATURE_LEVEL featureLevels[] { - D3D_FEATURE_LEVEL_11_1, - D3D_FEATURE_LEVEL_11_0, - D3D_FEATURE_LEVEL_10_1, - D3D_FEATURE_LEVEL_10_0, - D3D_FEATURE_LEVEL_9_3, - D3D_FEATURE_LEVEL_9_2, - D3D_FEATURE_LEVEL_9_1 - }; + init(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) { + HRESULT status; - HRESULT status = D3D11CreateDevice( - adapter_p, - D3D_DRIVER_TYPE_UNKNOWN, - nullptr, - D3D11_CREATE_DEVICE_FLAGS, - featureLevels, sizeof(featureLevels) / sizeof(D3D_FEATURE_LEVEL), - D3D11_SDK_VERSION, - &device, - nullptr, - &device_ctx); + if (config::video.serial) { + auto display_vram = (display_vram_t *) display.get(); + device.reset(display_vram->device.get()); + device->AddRef(); + device_ctx.reset(display_vram->device_ctx.get()); + device_ctx->AddRef(); + } + else { + D3D_FEATURE_LEVEL featureLevels[] { + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + D3D_FEATURE_LEVEL_9_3, + D3D_FEATURE_LEVEL_9_2, + D3D_FEATURE_LEVEL_9_1 + }; + + status = D3D11CreateDevice( + adapter_p, + D3D_DRIVER_TYPE_UNKNOWN, + nullptr, + D3D11_CREATE_DEVICE_FLAGS, + featureLevels, sizeof(featureLevels) / sizeof(D3D_FEATURE_LEVEL), + D3D11_SDK_VERSION, + &device, + nullptr, + &device_ctx); - if (FAILED(status)) { - BOOST_LOG(error) << "Failed to create encoder D3D11 device [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; + if (FAILED(status)) { + BOOST_LOG(error) << "Failed to create encoder D3D11 device [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } } dxgi::dxgi_t dxgi; @@ -921,6 +945,8 @@ namespace platf::dxgi { cursor_xor.set_pos(frame_info.PointerPosition.Position.x, frame_info.PointerPosition.Position.y, frame_info.PointerPosition.Visible); } + mouse_pointer_visible = (cursor_alpha.visible || cursor_xor.visible); + const bool blend_mouse_cursor_flag = (cursor_alpha.visible || cursor_xor.visible) && cursor_visible; texture2d_t src {}; @@ -1062,6 +1088,11 @@ namespace platf::dxgi { // also creates synchonization primitives for shared access from multiple direct3d devices. if (complete_img(d3d_img.get(), dummy)) return { nullptr, nullptr }; + // Don't need to lock the image because we're using a single direct3d device + if (config::video.serial) { + return { std::move(d3d_img), nullptr }; + } + // This image is shared between capture direct3d device and encoders direct3d devices, // we must acquire lock before doing anything to it. texture_lock_helper lock_helper(d3d_img->capture_mutex.get()); @@ -1088,7 +1119,7 @@ namespace platf::dxgi { std::shared_ptr img; if (!pull_free_image_cb(img)) return capture_e::interrupted; - auto [d3d_img, lock] = get_locked_d3d_img(img); + auto [d3d_img, optional_lock] = get_locked_d3d_img(img); if (!d3d_img) return capture_e::error; device_ctx->CopyResource(d3d_img->capture_texture.get(), p_surface->get()); @@ -1107,7 +1138,7 @@ namespace platf::dxgi { BOOST_LOG(error) << "Logical error at " << __FILE__ << ":" << __LINE__; return capture_e::error; } - auto [d3d_img, lock] = get_locked_d3d_img(*p_img); + auto [d3d_img, optional_lock] = get_locked_d3d_img(*p_img); if (!d3d_img) return capture_e::error; p_img = nullptr; @@ -1125,7 +1156,7 @@ namespace platf::dxgi { std::shared_ptr img; if (!pull_free_image_cb(img)) return capture_e::interrupted; - auto [d3d_img, lock] = get_locked_d3d_img(img); + auto [d3d_img, optional_lock] = get_locked_d3d_img(img); if (!d3d_img) return capture_e::error; device_ctx->CopyResource(d3d_img->capture_texture.get(), src.get()); @@ -1201,7 +1232,7 @@ namespace platf::dxgi { if (!pull_free_image_cb(img_out)) return capture_e::interrupted; - auto [d3d_img, lock] = get_locked_d3d_img(img_out); + auto [d3d_img, optional_lock] = get_locked_d3d_img(img_out); if (!d3d_img) return capture_e::error; device_ctx->CopyResource(d3d_img->capture_texture.get(), p_surface->get()); @@ -1217,7 +1248,7 @@ namespace platf::dxgi { auto old_d3d_img = (img_d3d_t *) img_out.get(); bool reclear_dummy = old_d3d_img->dummy && old_d3d_img->capture_texture; - auto [d3d_img, lock] = get_locked_d3d_img(img_out, true); + auto [d3d_img, optional_lock] = get_locked_d3d_img(img_out, true); if (!d3d_img) return capture_e::error; if (reclear_dummy) { @@ -1365,7 +1396,7 @@ namespace platf::dxgi { t.Usage = D3D11_USAGE_DEFAULT; t.Format = img->format; t.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; - t.MiscFlags = D3D11_RESOURCE_MISC_SHARED_NTHANDLE | D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX; + t.MiscFlags = config::video.serial ? 0 : (D3D11_RESOURCE_MISC_SHARED_NTHANDLE | D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX); HRESULT status; if (dummy) { @@ -1392,25 +1423,35 @@ namespace platf::dxgi { return -1; } - // Get the keyed mutex to synchronize with the encoding code - status = img->capture_texture->QueryInterface(__uuidof(IDXGIKeyedMutex), (void **) &img->capture_mutex); - if (FAILED(status)) { - BOOST_LOG(error) << "Failed to query IDXGIKeyedMutex [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; + if (config::video.serial) { + // Create the SRV for the capture texture + status = device->CreateShaderResourceView(img->capture_texture.get(), nullptr, &img->capture_srv); + if (FAILED(status)) { + BOOST_LOG(error) << "Failed to create shader resource view for capture [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } } + else { + // Get the keyed mutex to synchronize with the encoding code + status = img->capture_texture->QueryInterface(__uuidof(IDXGIKeyedMutex), (void **) &img->capture_mutex); + if (FAILED(status)) { + BOOST_LOG(error) << "Failed to query IDXGIKeyedMutex [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } - resource1_t resource; - status = img->capture_texture->QueryInterface(__uuidof(IDXGIResource1), (void **) &resource); - if (FAILED(status)) { - BOOST_LOG(error) << "Failed to query IDXGIResource1 [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } + resource1_t resource; + status = img->capture_texture->QueryInterface(__uuidof(IDXGIResource1), (void **) &resource); + if (FAILED(status)) { + BOOST_LOG(error) << "Failed to query IDXGIResource1 [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } - // Create a handle for the encoder device to use to open this texture - status = resource->CreateSharedHandle(nullptr, DXGI_SHARED_RESOURCE_READ, nullptr, &img->encoder_texture_handle); - if (FAILED(status)) { - BOOST_LOG(error) << "Failed to create shared texture handle [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; + // Create a handle for the encoder device to use to open this texture + status = resource->CreateSharedHandle(nullptr, DXGI_SHARED_RESOURCE_READ, nullptr, &img->encoder_texture_handle); + if (FAILED(status)) { + BOOST_LOG(error) << "Failed to create shared texture handle [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } } img->data = (std::uint8_t *) img->capture_texture.get(); diff --git a/src/video.cpp b/src/video.cpp index 41ddab7e11a..c246ea7f506 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -1765,7 +1765,11 @@ namespace video { auto idr_events = mail->event(mail::idr); idr_events->raise(true); +#ifdef _WIN32 + if ((chosen_encoder->flags & PARALLEL_ENCODING) && !config::video.serial) { +#else if (chosen_encoder->flags & PARALLEL_ENCODING) { +#endif capture_async(std::move(mail), config, channel_data); } else { diff --git a/src_assets/common/assets/web/config.html b/src_assets/common/assets/web/config.html index 3572dd70f84..45318139e42 100644 --- a/src_assets/common/assets/web/config.html +++ b/src_assets/common/assets/web/config.html @@ -550,6 +550,31 @@

Configuration

Disable if you encounter any VSync-related issues. + +
+ + +
+ Don't try to match client frame rate and stream every frame the host produces as soon as possible.
+ Can significantly improve frame time stability. +
+
+ +
+ + +
+ Don't use parallel devices for capture and encode.
+ Improves frame time stability and lowers overall streaming performance hit.
+ Lowers max theoretical throughput, use only if your gpu hardware encoder has enough headroom (most do). +
+
Configuration "capture": "", "controller": "enabled", "dwmflush": "enabled", + "unpaced": "disabled", + "serial": "disabled", "encoder": "", "fps": "[10,30,60,90,120]", "gamepad": "x360",