diff --git a/src/main.cpp b/src/main.cpp index 58271f83d02..cf2d71dad41 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -45,6 +45,7 @@ bl::sources::severity_logger error(4); // Recoverable errors bl::sources::severity_logger fatal(5); // Unrecoverable errors bool display_cursor = true; +bool force_callback = false; using text_sink = bl::sinks::asynchronous_sink; boost::shared_ptr sink; diff --git a/src/main.h b/src/main.h index 89c4dbdc6f4..4267a2586a8 100644 --- a/src/main.h +++ b/src/main.h @@ -12,6 +12,7 @@ extern util::ThreadPool task_pool; extern bool display_cursor; +extern bool force_callback; extern boost::log::sources::severity_logger verbose; extern boost::log::sources::severity_logger debug; diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index 3cf7c97789b..29b2117007a 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -573,14 +573,16 @@ capture_e display_vram_t::capture(snapshot_cb_t &&snapshot_cb, std::shared_ptr<: } next_frame = now + delay; - auto status = snapshot(img.get(), 1000ms, *cursor); + auto status = snapshot(img.get(), force_callback ? std::chrono::duration_cast(delay * 2) : 1000ms, *cursor); switch(status) { case platf::capture_e::reinit: case platf::capture_e::error: return status; case platf::capture_e::timeout: - std::this_thread::sleep_for(1ms); - continue; + if(!force_callback) { + std::this_thread::sleep_for(1ms); + continue; + } case platf::capture_e::ok: img = snapshot_cb(img); break; @@ -885,4 +887,4 @@ int init() { return 0; } -} // namespace platf::dxgi \ No newline at end of file +} // namespace platf::dxgi diff --git a/src/video.cpp b/src/video.cpp index bec77c81b9d..4417bb04108 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -71,7 +71,7 @@ util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_c util::Either vaapi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); util::Either cuda_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); -int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format); +int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format, bool amf_lowlatency); class swdevice_t : public platf::hwdevice_t { public: @@ -240,6 +240,7 @@ enum flag_e { H264_ONLY = 0x02, // When HEVC is too heavy LIMITED_GOP_SIZE = 0x04, // Some encoders don't like it when you have an infinite GOP_SIZE. *cough* VAAPI *cough* SINGLE_SLICE_ONLY = 0x08, // Never use multiple slices <-- Older intel iGPU's ruin it for everyone else :P + FORCE_CALLBACK = 0x10, // Force callbacks with short timeouts for encoders that don't perform well with callback-based capture }; struct encoder_t { @@ -438,7 +439,7 @@ static encoder_t nvenc { "h264_nvenc"s, }, #ifdef _WIN32 - DEFAULT, + FORCE_CALLBACK, dxgi_make_hwdevice_ctx #else PARALLEL_ENCODING, @@ -474,7 +475,7 @@ static encoder_t amdvce { std::make_optional({ "qp_p"s, &config::video.qp }), "h264_amf"s, }, - DEFAULT, + FORCE_CALLBACK, dxgi_make_hwdevice_ctx }; #endif @@ -931,7 +932,7 @@ std::optional make_session(const encoder_t &encoder, const config_t & } hwdevice_ctx = std::move(buf_or_error.left()); - if(hwframe_ctx(ctx, hwdevice_ctx, sw_fmt)) { + if(hwframe_ctx(ctx, hwdevice_ctx, sw_fmt, (encoder.name == "amdvce"sv))) { return std::nullopt; } @@ -1404,6 +1405,7 @@ void capture( void *channel_data) { auto idr_events = mail->event(mail::idr); + force_callback = encoders.front().flags & FORCE_CALLBACK; idr_events->raise(true); if(encoders.front().flags & PARALLEL_ENCODING) { @@ -1680,7 +1682,7 @@ int init() { return 0; } -int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) { +int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format, bool amf_lowlatency) { buffer_t frame_ref { av_hwframe_ctx_alloc(hwdevice.get()) }; auto frame_ctx = (AVHWFramesContext *)frame_ref->data; @@ -1694,6 +1696,12 @@ int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) { return err; } + if(amf_lowlatency) { + // reduce amf encoder's hw buffers from 16 -> 2 to minimize buffered frames + // note: pool size is deliberately set after initialization + frame_ctx->initial_pool_size = 3; + } + ctx->hw_frames_ctx = av_buffer_ref(frame_ref.get()); return 0;