From 85addc10a245ff17fbfe7f8e8f535e53c47c8136 Mon Sep 17 00:00:00 2001 From: Brad Richardson Date: Wed, 21 Dec 2022 21:11:43 -0500 Subject: [PATCH 1/3] QSV Co-authored-by: Caio Vidal --- .github/workflows/CI.yml | 2 + CMakeLists.txt | 4 +- src/config.cpp | 31 ++++++++++ src/config.h | 6 ++ src/video.cpp | 118 +++++++++++++++++++++++++++++++++------ 5 files changed, 141 insertions(+), 20 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index f9821dce247..8c276ba3e34 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -430,6 +430,7 @@ jobs: libcurl4-openssl-dev \ libdrm-dev \ libevdev-dev \ + libmfx-dev \ libnuma-dev \ libopus-dev \ libpulse-dev \ @@ -885,6 +886,7 @@ jobs: mingw-w64-x86_64-binutils mingw-w64-x86_64-boost mingw-w64-x86_64-cmake + mingw-w64-x86_64-libmfx mingw-w64-x86_64-nsis mingw-w64-x86_64-openssl mingw-w64-x86_64-opus diff --git a/CMakeLists.txt b/CMakeLists.txt index b192e00828a..bd7863fc5f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -387,8 +387,8 @@ set_source_files_properties(src/upnp.cpp PROPERTIES COMPILE_FLAGS -Wno-pedantic) # Pre-compiled binaries if(WIN32) - set(FFMPEG_PREPARED_BINARIES "${CMAKE_CURRENT_SOURCE_DIR}/third-party/ffmpeg-windows-x86_64") - set(FFMPEG_PLATFORM_LIBRARIES mfplat ole32 strmiids mfuuid) + set(FFMPEG_PREPARED_BINARIES "${CMAKE_CURRENT_SOURCE_DIR}/third-party/test/ffmpeg-windows-x86_64") + set(FFMPEG_PLATFORM_LIBRARIES mfplat ole32 strmiids mfuuid mfx) elseif(APPLE) if (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") set(FFMPEG_PREPARED_BINARIES "${CMAKE_CURRENT_SOURCE_DIR}/third-party/ffmpeg-macos-aarch64") diff --git a/src/config.cpp b/src/config.cpp index 5bc04159bb8..b9bef7e270f 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -1,8 +1,10 @@ #include +#include #include #include #include #include +#include #include #include @@ -200,6 +202,26 @@ int coder_from_view(const std::string_view &coder) { } } // namespace amd +namespace qsv { +enum preset_e : int { + _default = 4, + veryslow = 1, + slower = 2, + slow = 3, + medium = 4, + fast = 5, + faster = 6, + veryfast = 7 +}; + +enum cavlc_e : int { + _auto = false, + enabled = true, + disabled = false +}; + +} // namespace qsv + namespace vt { enum coder_e : int { @@ -261,6 +283,11 @@ video_t video { (int)amd::rc_hevc_e::vbr_latency, // rate control (hevc) (int)amd::coder_e::_auto, // coder }, // amd + { + qsv::medium, + 0, + "" }, // qsv + { 0, 0, @@ -776,6 +803,10 @@ void apply_config(std::unordered_map &&vars) { video.amd.rc_hevc = amd::rc_from_view(rc, 0); } + int_f(vars, "qsv_preset", video.qsv.preset); + int_f(vars, "qsv_cavlc", video.qsv.cavlc); + string_f(vars, "qsv_child_device", video.qsv.child_device); + int_f(vars, "vt_coder", video.vt.coder, vt::coder_from_view); int_f(vars, "vt_software", video.vt.allow_sw, vt::allow_software_from_view); int_f(vars, "vt_software", video.vt.require_sw, vt::force_software_from_view); diff --git a/src/config.h b/src/config.h index 7b1c705ec90..2d9f314aa04 100644 --- a/src/config.h +++ b/src/config.h @@ -36,6 +36,12 @@ struct video_t { int coder; } amd; + struct { + std::optional preset; + std::optional cavlc; + std::string child_device; + } qsv; + struct { int allow_sw; int require_sw; diff --git a/src/video.cpp b/src/video.cpp index 9529ce1f524..7c0d25dd83c 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -20,6 +20,7 @@ extern "C" { #ifdef _WIN32 extern "C" { #include +#include } #endif @@ -70,6 +71,7 @@ platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt); util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); util::Either vaapi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); util::Either cuda_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); +util::Either qsv_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format); @@ -317,8 +319,11 @@ struct encoder_t { int flags; std::function(platf::hwdevice_t *hwdevice)> make_hwdevice_ctx; + int hwframe_initial_pool_size; }; +int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format, const encoder_t &encoder); + class session_t { public: session_t() = default; @@ -447,6 +452,8 @@ static encoder_t nvenc { #else cuda_make_hwdevice_ctx #endif + , + -1 }; #ifdef _WIN32 @@ -486,10 +493,40 @@ static encoder_t amdvce { "h264_amf"s, }, PARALLEL_ENCODING, - dxgi_make_hwdevice_ctx + dxgi_make_hwdevice_ctx, + -1 }; #endif +static encoder_t quicksync { + "quicksync"sv, + { FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN }, + AV_HWDEVICE_TYPE_QSV, + AV_PIX_FMT_QSV, + AV_PIX_FMT_NV12, AV_PIX_FMT_P010, + { + { + { "forced_idr"s, "1" }, + { "preset"s, &config::video.qsv.preset }, + }, + std::make_optional({ "qp"s, &config::video.qp }), + "hevc_qsv"s, + }, + { + { + { "preset"s, &config::video.qsv.preset }, + { "cavlc"s, &config::video.qsv.cavlc }, + { "forced_idr"s, "1" }, + { "async_depth"s, "1" } + }, + std::make_optional({ "qp"s, &config::video.qp }), + "h264_qsv"s, + }, + PARALLEL_ENCODING, + qsv_make_hwdevice_ctx, + 20 +}; + static encoder_t software { "software"sv, { FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10 }, @@ -519,8 +556,8 @@ static encoder_t software { "libx264"s, }, H264_ONLY | PARALLEL_ENCODING, - - nullptr + nullptr, + -1 }; #ifdef __linux__ @@ -550,7 +587,8 @@ static encoder_t vaapi { }, LIMITED_GOP_SIZE | PARALLEL_ENCODING | SINGLE_SLICE_ONLY, - vaapi_make_hwdevice_ctx + vaapi_make_hwdevice_ctx, + -1 }; #endif @@ -581,13 +619,15 @@ static encoder_t videotoolbox { }, DEFAULT, - nullptr + nullptr, + -1 }; #endif static std::vector encoders { #ifndef __APPLE__ nvenc, + quicksync, #endif #ifdef _WIN32 amdvce, @@ -944,7 +984,7 @@ std::optional make_session(const encoder_t &encoder, const config_t & } hwdevice_ctx = std::move(buf_or_error.left()); - if(hwframe_ctx(ctx, hwdevice_ctx, sw_fmt)) { + if(hwframe_ctx(ctx, hwdevice_ctx, sw_fmt, encoder)) { return std::nullopt; } @@ -1049,6 +1089,9 @@ std::optional make_session(const encoder_t &encoder, const config_t & if(!video_format[encoder_t::NALU_PREFIX_5b]) { auto nalu_prefix = config.videoFormat ? hevc_nalu : h264_nalu; + session.replacements.emplace_back("\000\000\000\001\'"sv, "\000\000\000\001g"sv); //sps + session.replacements.emplace_back("\000\000\000\001("sv, "\000\000\000\001h"sv); //pps + session.replacements.emplace_back("\000\000\001%"sv, "\000\000\001e"sv); //idr session.replacements.emplace_back(nalu_prefix.substr(1), nalu_prefix); } @@ -1455,25 +1498,30 @@ enum validate_flag_e { int validate_config(std::shared_ptr &disp, const encoder_t &encoder, const config_t &config) { reset_display(disp, encoder.dev_type, config::video.output_name, config.framerate); if(!disp) { + BOOST_LOG(verbose) << "Failed to reset display"; return -1; } auto pix_fmt = config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt); auto hwdevice = disp->make_hwdevice(pix_fmt); if(!hwdevice) { + BOOST_LOG(verbose) << "Failed to make hwdevice"; return -1; } auto session = make_session(encoder, config, disp->width, disp->height, std::move(hwdevice)); if(!session) { + BOOST_LOG(verbose) << "Failed to make session"; return -1; } auto img = disp->alloc_img(); if(!img || disp->dummy_img(img.get())) { + BOOST_LOG(verbose) << "Failed to create dummy image"; return -1; } if(session->device->convert(*img)) { + BOOST_LOG(verbose) << "Failed to convert image"; return -1; } @@ -1531,7 +1579,9 @@ bool validate_encoder(encoder_t &encoder) { config_t config_autoselect { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0 }; retry: + BOOST_LOG(verbose) << "Validating max ref frames config"; auto max_ref_frames_h264 = validate_config(disp, encoder, config_max_ref_frames); + BOOST_LOG(verbose) << "Validating autoselect config"; auto autoselect_h264 = validate_config(disp, encoder, config_autoselect); if(max_ref_frames_h264 < 0 && autoselect_h264 < 0) { @@ -1541,6 +1591,7 @@ bool validate_encoder(encoder_t &encoder) { encoder.h264[encoder_t::CBR] = false; goto retry; } + BOOST_LOG(verbose) << "Failed after disabling CBR"; return false; } @@ -1550,6 +1601,7 @@ bool validate_encoder(encoder_t &encoder) { }; for(auto [validate_flag, encoder_flag] : packet_deficiencies) { + BOOST_LOG(verbose) << "Validating: " << validate_flag << " | " << encoder_flag; encoder.h264[encoder_flag] = (max_ref_frames_h264 & validate_flag && autoselect_h264 & validate_flag); } @@ -1699,15 +1751,18 @@ int init() { return 0; } -int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) { +int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format, const encoder_t &encoder) { buffer_t frame_ref { av_hwframe_ctx_alloc(hwdevice.get()) }; - auto frame_ctx = (AVHWFramesContext *)frame_ref->data; - frame_ctx->format = ctx->pix_fmt; - frame_ctx->sw_format = format; - frame_ctx->height = ctx->height; - frame_ctx->width = ctx->width; - frame_ctx->initial_pool_size = 0; + auto frame_ctx = (AVHWFramesContext *)frame_ref->data; + frame_ctx->format = ctx->pix_fmt; + frame_ctx->sw_format = format; + frame_ctx->height = ctx->height; + frame_ctx->width = ctx->width; + + if(encoder.hwframe_initial_pool_size >= 0) { + frame_ctx->initial_pool_size = encoder.hwframe_initial_pool_size; + } if(auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) { return err; @@ -1719,14 +1774,14 @@ int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) { } // Linux only declaration -typedef int (*vaapi_make_hwdevice_ctx_fn)(platf::hwdevice_t *base, AVBufferRef **hw_device_buf); +typedef int (*vaapi_make_hwdevice_ctx_fn)(platf::hwdevice_t *hwdevice_ctx, AVBufferRef **hw_device_buf); -util::Either vaapi_make_hwdevice_ctx(platf::hwdevice_t *base) { +util::Either vaapi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { buffer_t hw_device_buf; // If an egl hwdevice - if(base->data) { - if(((vaapi_make_hwdevice_ctx_fn)base->data)(base, &hw_device_buf)) { + if(hwdevice_ctx->data) { + if(((vaapi_make_hwdevice_ctx_fn)hwdevice_ctx->data)(hwdevice_ctx, &hw_device_buf)) { return -1; } @@ -1745,7 +1800,7 @@ util::Either vaapi_make_hwdevice_ctx(platf::hwdevice_t *base) { return hw_device_buf; } -util::Either cuda_make_hwdevice_ctx(platf::hwdevice_t *base) { +util::Either cuda_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { buffer_t hw_device_buf; auto status = av_hwdevice_ctx_create(&hw_device_buf, AV_HWDEVICE_TYPE_CUDA, nullptr, nullptr, 1 /* AV_CUDA_USE_PRIMARY_CONTEXT */); @@ -1789,6 +1844,32 @@ util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_c return ctx_buf; } + +util::Either qsv_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { + + AVBufferRef *hw_device_ctx = NULL; + + AVDictionary *child_device_opts = NULL; + + if(!config::video.qsv.child_device.empty()) { + av_dict_set(&child_device_opts, "child_device", config::video.qsv.child_device.data(), 0); + } + + auto err = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_QSV, NULL, child_device_opts, 0); + + if(err) { + char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; + BOOST_LOG(error) << "Failed to create FFMpeg hardware device context: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); + + return err; + } + + buffer_t ctx_buf { hw_device_ctx }; + + return ctx_buf; +} + + #endif int start_capture_async(capture_thread_async_ctx_t &capture_thread_ctx) { @@ -1822,6 +1903,7 @@ void end_capture_sync(capture_thread_sync_ctx_t &ctx) {} platf::mem_type_e map_dev_type(AVHWDeviceType type) { switch(type) { case AV_HWDEVICE_TYPE_D3D11VA: + case AV_HWDEVICE_TYPE_QSV: return platf::mem_type_e::dxgi; case AV_HWDEVICE_TYPE_VAAPI: return platf::mem_type_e::vaapi; From 05f030d151b591a734350848630e39ec59637ade Mon Sep 17 00:00:00 2001 From: Brad Richardson Date: Thu, 22 Dec 2022 23:52:33 -0500 Subject: [PATCH 2/3] Pulling more changes from previous PR notes --- src/config.cpp | 2 +- src/platform/windows/display_base.cpp | 8 ++++++++ src/platform/windows/display_vram.cpp | 4 ++++ src/video.cpp | 22 +++++++++------------- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index b9bef7e270f..93b441275c4 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -285,7 +285,7 @@ video_t video { }, // amd { qsv::medium, - 0, + qsv::disabled, "" }, // qsv { diff --git a/src/platform/windows/display_base.cpp b/src/platform/windows/display_base.cpp index 9f8c2705957..04ee8ee9348 100644 --- a/src/platform/windows/display_base.cpp +++ b/src/platform/windows/display_base.cpp @@ -232,6 +232,14 @@ int display_base_t::init(int framerate, const std::string &display_name) { } dup.use_dwmflush = config::video.dwmflush && !(framerate > refresh_rate) ? true : false; + + ID3D10Multithread *pMultithread; + + status = device->QueryInterface(IID_ID3D10Multithread, (void **)&pMultithread); + if(SUCCEEDED(status)) { + pMultithread->SetMultithreadProtected(TRUE); + Release(pMultithread); + } // Bump up thread priority { diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index 75e77b6e23a..109990ea8c7 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -410,6 +410,10 @@ class hwdevice_t : public platf::hwdevice_t { frame->height = img.height; frame->width = img.width; + AVFrame* qsv_frame = av_frame_alloc(); + qsv_frame->format = AV_PIX_FMT_QSV; + av_hwframe_map(qsv_frame, frame, AV_HWFRAME_MAP_READ); + return 0; } diff --git a/src/video.cpp b/src/video.cpp index 7c0d25dd83c..c0f2389b05d 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -1089,9 +1089,6 @@ std::optional make_session(const encoder_t &encoder, const config_t & if(!video_format[encoder_t::NALU_PREFIX_5b]) { auto nalu_prefix = config.videoFormat ? hevc_nalu : h264_nalu; - session.replacements.emplace_back("\000\000\000\001\'"sv, "\000\000\000\001g"sv); //sps - session.replacements.emplace_back("\000\000\000\001("sv, "\000\000\000\001h"sv); //pps - session.replacements.emplace_back("\000\000\001%"sv, "\000\000\001e"sv); //idr session.replacements.emplace_back(nalu_prefix.substr(1), nalu_prefix); } @@ -1498,30 +1495,25 @@ enum validate_flag_e { int validate_config(std::shared_ptr &disp, const encoder_t &encoder, const config_t &config) { reset_display(disp, encoder.dev_type, config::video.output_name, config.framerate); if(!disp) { - BOOST_LOG(verbose) << "Failed to reset display"; return -1; } auto pix_fmt = config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt); auto hwdevice = disp->make_hwdevice(pix_fmt); if(!hwdevice) { - BOOST_LOG(verbose) << "Failed to make hwdevice"; return -1; } auto session = make_session(encoder, config, disp->width, disp->height, std::move(hwdevice)); if(!session) { - BOOST_LOG(verbose) << "Failed to make session"; return -1; } auto img = disp->alloc_img(); if(!img || disp->dummy_img(img.get())) { - BOOST_LOG(verbose) << "Failed to create dummy image"; return -1; } if(session->device->convert(*img)) { - BOOST_LOG(verbose) << "Failed to convert image"; return -1; } @@ -1579,9 +1571,7 @@ bool validate_encoder(encoder_t &encoder) { config_t config_autoselect { 1920, 1080, 60, 1000, 1, 0, 1, 0, 0 }; retry: - BOOST_LOG(verbose) << "Validating max ref frames config"; auto max_ref_frames_h264 = validate_config(disp, encoder, config_max_ref_frames); - BOOST_LOG(verbose) << "Validating autoselect config"; auto autoselect_h264 = validate_config(disp, encoder, config_autoselect); if(max_ref_frames_h264 < 0 && autoselect_h264 < 0) { @@ -1591,7 +1581,6 @@ bool validate_encoder(encoder_t &encoder) { encoder.h264[encoder_t::CBR] = false; goto retry; } - BOOST_LOG(verbose) << "Failed after disabling CBR"; return false; } @@ -1601,7 +1590,6 @@ bool validate_encoder(encoder_t &encoder) { }; for(auto [validate_flag, encoder_flag] : packet_deficiencies) { - BOOST_LOG(verbose) << "Validating: " << validate_flag << " | " << encoder_flag; encoder.h264[encoder_flag] = (max_ref_frames_h264 & validate_flag && autoselect_h264 & validate_flag); } @@ -1855,7 +1843,15 @@ util::Either qsv_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ct av_dict_set(&child_device_opts, "child_device", config::video.qsv.child_device.data(), 0); } - auto err = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_QSV, NULL, child_device_opts, 0); + auto buf_or_error = dxgi_make_hwdevice_ctx(hwdevice_ctx); + if(buf_or_error.has_right()) { + return buf_or_error.right(); + } + + auto dxgi_hwdevice_ctx = std::move(buf_or_error.left().get()); + + // auto err = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_QSV, NULL, child_device_opts, 0); + auto err = av_hwdevice_ctx_create_derived(&hw_device_ctx, AV_HWDEVICE_TYPE_QSV, dxgi_hwdevice_ctx, 0); if(err) { char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; From 5f91ce5d84607297b1d452033c14bfe5381e8ea3 Mon Sep 17 00:00:00 2001 From: Brad Richardson Date: Wed, 28 Dec 2022 20:26:16 -0500 Subject: [PATCH 3/3] Passing encoder checks (poorly) --- src/platform/windows/display_vram.cpp | 6 ++++++ src/video.cpp | 12 ++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index 109990ea8c7..30189d79fc0 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -410,6 +410,12 @@ class hwdevice_t : public platf::hwdevice_t { frame->height = img.height; frame->width = img.width; + // This resets the frame and produces bad output but does allow us to pass encoder checks +// if(av_hwframe_get_buffer(frame->hw_frames_ctx, frame, 0)) { +// BOOST_LOG(error) << "Couldn't get hwframe for QSV"sv; +// return -1; +// } + AVFrame* qsv_frame = av_frame_alloc(); qsv_frame->format = AV_PIX_FMT_QSV; av_hwframe_map(qsv_frame, frame, AV_HWFRAME_MAP_READ); diff --git a/src/video.cpp b/src/video.cpp index c0f2389b05d..aa08e10b0e9 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -20,7 +20,6 @@ extern "C" { #ifdef _WIN32 extern "C" { #include -#include } #endif @@ -1835,9 +1834,8 @@ util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_c util::Either qsv_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { - AVBufferRef *hw_device_ctx = NULL; - - AVDictionary *child_device_opts = NULL; + AVBufferRef *hw_device_ctx = nullptr; + AVDictionary *child_device_opts = nullptr; if(!config::video.qsv.child_device.empty()) { av_dict_set(&child_device_opts, "child_device", config::video.qsv.child_device.data(), 0); @@ -1848,10 +1846,8 @@ util::Either qsv_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ct return buf_or_error.right(); } - auto dxgi_hwdevice_ctx = std::move(buf_or_error.left().get()); - - // auto err = av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_QSV, NULL, child_device_opts, 0); - auto err = av_hwdevice_ctx_create_derived(&hw_device_ctx, AV_HWDEVICE_TYPE_QSV, dxgi_hwdevice_ctx, 0); + auto dxgi_hwdevice_ctx = buf_or_error.left().get(); + auto err = av_hwdevice_ctx_create_derived_opts(&hw_device_ctx, AV_HWDEVICE_TYPE_QSV, dxgi_hwdevice_ctx, child_device_opts, 0); if(err) { char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 };