diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index f9821dce247..8c276ba3e34 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -430,6 +430,7 @@ jobs: libcurl4-openssl-dev \ libdrm-dev \ libevdev-dev \ + libmfx-dev \ libnuma-dev \ libopus-dev \ libpulse-dev \ @@ -885,6 +886,7 @@ jobs: mingw-w64-x86_64-binutils mingw-w64-x86_64-boost mingw-w64-x86_64-cmake + mingw-w64-x86_64-libmfx mingw-w64-x86_64-nsis mingw-w64-x86_64-openssl mingw-w64-x86_64-opus diff --git a/CMakeLists.txt b/CMakeLists.txt index b192e00828a..bd7863fc5f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -387,8 +387,8 @@ set_source_files_properties(src/upnp.cpp PROPERTIES COMPILE_FLAGS -Wno-pedantic) # Pre-compiled binaries if(WIN32) - set(FFMPEG_PREPARED_BINARIES "${CMAKE_CURRENT_SOURCE_DIR}/third-party/ffmpeg-windows-x86_64") - set(FFMPEG_PLATFORM_LIBRARIES mfplat ole32 strmiids mfuuid) + set(FFMPEG_PREPARED_BINARIES "${CMAKE_CURRENT_SOURCE_DIR}/third-party/test/ffmpeg-windows-x86_64") + set(FFMPEG_PLATFORM_LIBRARIES mfplat ole32 strmiids mfuuid mfx) elseif(APPLE) if (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") set(FFMPEG_PREPARED_BINARIES "${CMAKE_CURRENT_SOURCE_DIR}/third-party/ffmpeg-macos-aarch64") diff --git a/src/config.cpp b/src/config.cpp index 5bc04159bb8..93b441275c4 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -1,8 +1,10 @@ #include +#include #include #include #include #include +#include #include #include @@ -200,6 +202,26 @@ int coder_from_view(const std::string_view &coder) { } } // namespace amd +namespace qsv { +enum preset_e : int { + _default = 4, + veryslow = 1, + slower = 2, + slow = 3, + medium = 4, + fast = 5, + faster = 6, + veryfast = 7 +}; + +enum cavlc_e : int { + _auto = false, + enabled = true, + disabled = false +}; + +} // namespace qsv + namespace vt { enum coder_e : int { @@ -261,6 +283,11 @@ video_t video { (int)amd::rc_hevc_e::vbr_latency, // rate control (hevc) (int)amd::coder_e::_auto, // coder }, // amd + { + qsv::medium, + qsv::disabled, + "" }, // qsv + { 0, 0, @@ -776,6 +803,10 @@ void apply_config(std::unordered_map &&vars) { video.amd.rc_hevc = amd::rc_from_view(rc, 0); } + int_f(vars, "qsv_preset", video.qsv.preset); + int_f(vars, "qsv_cavlc", video.qsv.cavlc); + string_f(vars, "qsv_child_device", video.qsv.child_device); + int_f(vars, "vt_coder", video.vt.coder, vt::coder_from_view); int_f(vars, "vt_software", video.vt.allow_sw, vt::allow_software_from_view); int_f(vars, "vt_software", video.vt.require_sw, vt::force_software_from_view); diff --git a/src/config.h b/src/config.h index 7b1c705ec90..2d9f314aa04 100644 --- a/src/config.h +++ b/src/config.h @@ -36,6 +36,12 @@ struct video_t { int coder; } amd; + struct { + std::optional preset; + std::optional cavlc; + std::string child_device; + } qsv; + struct { int allow_sw; int require_sw; diff --git a/src/platform/windows/display_base.cpp b/src/platform/windows/display_base.cpp index 9f8c2705957..04ee8ee9348 100644 --- a/src/platform/windows/display_base.cpp +++ b/src/platform/windows/display_base.cpp @@ -232,6 +232,14 @@ int display_base_t::init(int framerate, const std::string &display_name) { } dup.use_dwmflush = config::video.dwmflush && !(framerate > refresh_rate) ? true : false; + + ID3D10Multithread *pMultithread; + + status = device->QueryInterface(IID_ID3D10Multithread, (void **)&pMultithread); + if(SUCCEEDED(status)) { + pMultithread->SetMultithreadProtected(TRUE); + Release(pMultithread); + } // Bump up thread priority { diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index 75e77b6e23a..30189d79fc0 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -410,6 +410,16 @@ class hwdevice_t : public platf::hwdevice_t { frame->height = img.height; frame->width = img.width; + // This resets the frame and produces bad output but does allow us to pass encoder checks +// if(av_hwframe_get_buffer(frame->hw_frames_ctx, frame, 0)) { +// BOOST_LOG(error) << "Couldn't get hwframe for QSV"sv; +// return -1; +// } + + AVFrame* qsv_frame = av_frame_alloc(); + qsv_frame->format = AV_PIX_FMT_QSV; + av_hwframe_map(qsv_frame, frame, AV_HWFRAME_MAP_READ); + return 0; } diff --git a/src/video.cpp b/src/video.cpp index 9529ce1f524..aa08e10b0e9 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -70,6 +70,7 @@ platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt); util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); util::Either vaapi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); util::Either cuda_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); +util::Either qsv_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format); @@ -317,8 +318,11 @@ struct encoder_t { int flags; std::function(platf::hwdevice_t *hwdevice)> make_hwdevice_ctx; + int hwframe_initial_pool_size; }; +int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format, const encoder_t &encoder); + class session_t { public: session_t() = default; @@ -447,6 +451,8 @@ static encoder_t nvenc { #else cuda_make_hwdevice_ctx #endif + , + -1 }; #ifdef _WIN32 @@ -486,10 +492,40 @@ static encoder_t amdvce { "h264_amf"s, }, PARALLEL_ENCODING, - dxgi_make_hwdevice_ctx + dxgi_make_hwdevice_ctx, + -1 }; #endif +static encoder_t quicksync { + "quicksync"sv, + { FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN }, + AV_HWDEVICE_TYPE_QSV, + AV_PIX_FMT_QSV, + AV_PIX_FMT_NV12, AV_PIX_FMT_P010, + { + { + { "forced_idr"s, "1" }, + { "preset"s, &config::video.qsv.preset }, + }, + std::make_optional({ "qp"s, &config::video.qp }), + "hevc_qsv"s, + }, + { + { + { "preset"s, &config::video.qsv.preset }, + { "cavlc"s, &config::video.qsv.cavlc }, + { "forced_idr"s, "1" }, + { "async_depth"s, "1" } + }, + std::make_optional({ "qp"s, &config::video.qp }), + "h264_qsv"s, + }, + PARALLEL_ENCODING, + qsv_make_hwdevice_ctx, + 20 +}; + static encoder_t software { "software"sv, { FF_PROFILE_H264_HIGH, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN_10 }, @@ -519,8 +555,8 @@ static encoder_t software { "libx264"s, }, H264_ONLY | PARALLEL_ENCODING, - - nullptr + nullptr, + -1 }; #ifdef __linux__ @@ -550,7 +586,8 @@ static encoder_t vaapi { }, LIMITED_GOP_SIZE | PARALLEL_ENCODING | SINGLE_SLICE_ONLY, - vaapi_make_hwdevice_ctx + vaapi_make_hwdevice_ctx, + -1 }; #endif @@ -581,13 +618,15 @@ static encoder_t videotoolbox { }, DEFAULT, - nullptr + nullptr, + -1 }; #endif static std::vector encoders { #ifndef __APPLE__ nvenc, + quicksync, #endif #ifdef _WIN32 amdvce, @@ -944,7 +983,7 @@ std::optional make_session(const encoder_t &encoder, const config_t & } hwdevice_ctx = std::move(buf_or_error.left()); - if(hwframe_ctx(ctx, hwdevice_ctx, sw_fmt)) { + if(hwframe_ctx(ctx, hwdevice_ctx, sw_fmt, encoder)) { return std::nullopt; } @@ -1699,15 +1738,18 @@ int init() { return 0; } -int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) { +int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format, const encoder_t &encoder) { buffer_t frame_ref { av_hwframe_ctx_alloc(hwdevice.get()) }; - auto frame_ctx = (AVHWFramesContext *)frame_ref->data; - frame_ctx->format = ctx->pix_fmt; - frame_ctx->sw_format = format; - frame_ctx->height = ctx->height; - frame_ctx->width = ctx->width; - frame_ctx->initial_pool_size = 0; + auto frame_ctx = (AVHWFramesContext *)frame_ref->data; + frame_ctx->format = ctx->pix_fmt; + frame_ctx->sw_format = format; + frame_ctx->height = ctx->height; + frame_ctx->width = ctx->width; + + if(encoder.hwframe_initial_pool_size >= 0) { + frame_ctx->initial_pool_size = encoder.hwframe_initial_pool_size; + } if(auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) { return err; @@ -1719,14 +1761,14 @@ int hwframe_ctx(ctx_t &ctx, buffer_t &hwdevice, AVPixelFormat format) { } // Linux only declaration -typedef int (*vaapi_make_hwdevice_ctx_fn)(platf::hwdevice_t *base, AVBufferRef **hw_device_buf); +typedef int (*vaapi_make_hwdevice_ctx_fn)(platf::hwdevice_t *hwdevice_ctx, AVBufferRef **hw_device_buf); -util::Either vaapi_make_hwdevice_ctx(platf::hwdevice_t *base) { +util::Either vaapi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { buffer_t hw_device_buf; // If an egl hwdevice - if(base->data) { - if(((vaapi_make_hwdevice_ctx_fn)base->data)(base, &hw_device_buf)) { + if(hwdevice_ctx->data) { + if(((vaapi_make_hwdevice_ctx_fn)hwdevice_ctx->data)(hwdevice_ctx, &hw_device_buf)) { return -1; } @@ -1745,7 +1787,7 @@ util::Either vaapi_make_hwdevice_ctx(platf::hwdevice_t *base) { return hw_device_buf; } -util::Either cuda_make_hwdevice_ctx(platf::hwdevice_t *base) { +util::Either cuda_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { buffer_t hw_device_buf; auto status = av_hwdevice_ctx_create(&hw_device_buf, AV_HWDEVICE_TYPE_CUDA, nullptr, nullptr, 1 /* AV_CUDA_USE_PRIMARY_CONTEXT */); @@ -1789,6 +1831,37 @@ util::Either dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_c return ctx_buf; } + +util::Either qsv_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { + + AVBufferRef *hw_device_ctx = nullptr; + AVDictionary *child_device_opts = nullptr; + + if(!config::video.qsv.child_device.empty()) { + av_dict_set(&child_device_opts, "child_device", config::video.qsv.child_device.data(), 0); + } + + auto buf_or_error = dxgi_make_hwdevice_ctx(hwdevice_ctx); + if(buf_or_error.has_right()) { + return buf_or_error.right(); + } + + auto dxgi_hwdevice_ctx = buf_or_error.left().get(); + auto err = av_hwdevice_ctx_create_derived_opts(&hw_device_ctx, AV_HWDEVICE_TYPE_QSV, dxgi_hwdevice_ctx, child_device_opts, 0); + + if(err) { + char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; + BOOST_LOG(error) << "Failed to create FFMpeg hardware device context: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); + + return err; + } + + buffer_t ctx_buf { hw_device_ctx }; + + return ctx_buf; +} + + #endif int start_capture_async(capture_thread_async_ctx_t &capture_thread_ctx) { @@ -1822,6 +1895,7 @@ void end_capture_sync(capture_thread_sync_ctx_t &ctx) {} platf::mem_type_e map_dev_type(AVHWDeviceType type) { switch(type) { case AV_HWDEVICE_TYPE_D3D11VA: + case AV_HWDEVICE_TYPE_QSV: return platf::mem_type_e::dxgi; case AV_HWDEVICE_TYPE_VAAPI: return platf::mem_type_e::vaapi;