Skip to content

Commit

Permalink
Add depth estimation model (#366)
Browse files Browse the repository at this point in the history
* add model file

* add model

* blur focus

* blur effect

* lint

* readme

* fix ini

* focal blur

* conversion

* lint

* fix shader
  • Loading branch information
royshil authored Jul 4, 2023
1 parent 1d521f9 commit 04ba710
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 41 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,5 @@

# Exclude CMake build number cache
/cmake/.CMakeBuildNumber

src/*.generated.*
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ A plugin for [OBS Studio](https://obsproject.com/) that allows you to replace th
- [MacOSX](#mac-osx)
- [Linux (Ubuntu, Arch, openSUSE)](#linux)
- [Windows](#windows)

🚧 Check out our experimental [CleanStream](https://github.com/royshil/obs-cleanstream) OBS plugin for real-time filler word (uh,um) and profanity removal from live audio stream 🚧

## Download
Expand Down Expand Up @@ -78,14 +78,15 @@ The pretrained model weights used for portrait foreground segmentation are taken
- https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.7/contrib/PP-HumanSeg
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/082_MediaPipe_Meet_Segmentation
- https://github.com/PeterL1n/RobustVideoMatting
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/384_TCMonoDepth and https://github.com/yu-li/TCMonoDepth

Image enhancement (low light) models are taken from:
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/213_TBEFN
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/372_URetinex-Net
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/370_Semantic-Guided-Low-Light-Image-Enhancement
- https://github.com/PINTO0309/PINTO_model_zoo/tree/main/243_Zero-DCE-improved

Some more information about how I built it: https://www.morethantechnical.com/2021/04/15/obs-plugin-for-portrait-background-removal-with-onnx-sinet-model/
Some more information about how I built it: https://www.morethantechnical.com/2021/04/15/obs-plugin-for-portrait-background-removal-with-onnx-sinet-model/ and https://www.morethantechnical.com/2023/05/20/building-an-obs-background-removal-plugin-a-walkthrough/

## Building

Expand Down
34 changes: 32 additions & 2 deletions data/effects/kawase_blur.effect
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
uniform float4x4 ViewProj;
uniform texture2d image;
uniform texture2d focalmask; // focal (depth) mask

uniform float xOffset;
uniform float yOffset;

uniform int blurIter; // Current blur iteration
uniform int blurTotal; // Total number of blur iterations
uniform float blurFocusPoint; // Focus point for the blur. 0 = back, 1 = front

sampler_state textureSampler {
Filter = Linear;
AddressU = Clamp;
Expand All @@ -28,8 +33,33 @@ VertDataOut VSDefault(VertDataOut v_in)
return vert_out;
}

float4 PSKawaseBlur(VertDataOut v_in) : TARGET
/**
* Kawase focal blur
* The blur amount will be based on the depth of the pixel, and the focus point.
* The focus point is a value between 0 and 1, where 0 is the back of the image, and 1 is the front.
* The blur amount is the difference between the focus point and the estimated depth of the pixel.
*/
float4 PSKawaseFocalBlur(VertDataOut v_in) : TARGET
{
float blurIterF = float(blurIter) / float(blurTotal);

// Blur the focal map to get a smoother value else aliasing occurs
float blurValue = focalmask.Sample(textureSampler, v_in.uv).r;
blurValue += focalmask.Sample(textureSampler, v_in.uv + float2( 0.01, 0.01)).r;
blurValue += focalmask.Sample(textureSampler, v_in.uv + float2(-0.01, 0.01)).r;
blurValue += focalmask.Sample(textureSampler, v_in.uv + float2( 0.01, -0.01)).r;
blurValue += focalmask.Sample(textureSampler, v_in.uv + float2(-0.01, -0.01)).r;
blurValue *= 0.25;

// Calculate the distance from the focus point for this pixel
float blurFocusDistance = clamp(abs(blurValue - blurFocusPoint), 0.0, 1.0);

if (blurIterF > blurFocusDistance) {
// If we're past the focus point, just return the image pixel, don't blur further
return image.Sample(textureSampler, v_in.uv);
}

// Calculate the blur value from neighboring pixels
float4 sum = float4(0.0, 0.0, 0.0, 0.0);
sum += image.Sample(textureSampler, v_in.uv + float2( xOffset, yOffset));
sum += image.Sample(textureSampler, v_in.uv + float2(-xOffset, yOffset));
Expand All @@ -44,6 +74,6 @@ technique Draw
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSKawaseBlur(v_in);
pixel_shader = PSKawaseFocalBlur(v_in);
}
}
13 changes: 2 additions & 11 deletions data/effects/mask_alpha_filter.effect
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ uniform float4x4 ViewProj;

uniform texture2d image; // input RGBA
uniform texture2d alphamask; // alpha mask
uniform int blurSize; // Size of the image blur kernel. 0 = no blur
uniform float xTexelSize; // Size of texel in X coord
uniform float yTexelSize; // Size of texel in Y coord
uniform texture2d blurredBackground; // input RGBA

sampler_state textureSampler {
Expand Down Expand Up @@ -33,14 +30,8 @@ VertDataOut VSDefault(VertDataIn v_in)

float4 PSAlphaMaskRGBAWithBlur(VertDataOut v_in) : TARGET
{
float4 inputRGBA = image.Sample(textureSampler, v_in.uv);
inputRGBA.rgb = max(float3(0.0, 0.0, 0.0), inputRGBA.rgb / inputRGBA.a);

float4 outputRGBA;
float a = (1.0 - alphamask.Sample(textureSampler, v_in.uv).r) * inputRGBA.a;
outputRGBA.rgb = inputRGBA.rgb * a + blurredBackground.Sample(textureSampler, v_in.uv).rgb * (1.0 - a);
outputRGBA.a = 1;
return outputRGBA;
// Return the blurred image, the focal mask is already applied to the blurred image
return float4(blurredBackground.Sample(textureSampler, v_in.uv).rgb, 1.0);
}

float4 PSAlphaMaskRGBAWithoutBlur(VertDataOut v_in) : TARGET
Expand Down
2 changes: 2 additions & 0 deletions data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ URETINEX="URetinex-Net"
SGLLIE="Semantic Guided Enhancement"
ZERODCE="Zero-DCE"
EnableThreshold="Enable threshold"
BlurFocusPoint="Blur focus point"
TCMonoDepth="TCMonoDepth (Depth)"
Binary file added data/models/tcmonodepth_tcsmallnet_192x320.onnx
Binary file not shown.
79 changes: 53 additions & 26 deletions src/background-filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "models/ModelSelfie.h"
#include "models/ModelRVM.h"
#include "models/ModelPPHumanSeg.h"
#include "models/ModelTCMonoDepth.h"
#include "FilterData.h"
#include "ort-utils/ort-session-utils.h"
#include "obs-utils/obs-utils.h"
Expand All @@ -38,6 +39,7 @@ struct background_removal_filter : public filter_data {
int maskEveryXFrames = 1;
int maskEveryXFramesCount = 0;
int64_t blurBackground = 0;
float blurFocusPoint = 0.1f;

gs_effect_t *effect;
gs_effect_t *kawaseBlurEffect;
Expand Down Expand Up @@ -71,6 +73,7 @@ obs_properties_t *background_filter_properties(void *data)
{
obs_properties_t *props = obs_properties_create();

/* Threshold props */
obs_property_t *p = obs_properties_add_bool(
props, "enable_threshold", obs_module_text("EnableThreshold"));
obs_property_set_modified_callback(p, enable_threshold_modified);
Expand All @@ -92,6 +95,7 @@ obs_properties_t *background_filter_properties(void *data)
props, "feather", obs_module_text("FeatherBlendSilhouette"),
0.0, 1.0, 0.05);

/* GPU, CPU and performance Props */
obs_property_t *p_use_gpu = obs_properties_add_list(
props, "useGPU", obs_module_text("InferenceDevice"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
Expand All @@ -111,6 +115,13 @@ obs_properties_t *background_filter_properties(void *data)
USEGPU_COREML);
#endif

obs_properties_add_int(props, "mask_every_x_frames",
obs_module_text("CalculateMaskEveryXFrame"), 1,
300, 1);
obs_properties_add_int_slider(props, "numThreads",
obs_module_text("NumThreads"), 0, 8, 1);

/* Model selection Props */
obs_property_t *p_model_select = obs_properties_add_list(
props, "model_select", obs_module_text("SegmentationModel"),
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
Expand All @@ -128,17 +139,19 @@ obs_properties_t *background_filter_properties(void *data)
obs_property_list_add_string(p_model_select,
obs_module_text("Robust Video Matting"),
MODEL_RVM);
obs_property_list_add_string(p_model_select,
obs_module_text("TCMonoDepth"),
MODEL_DEPTH_TCMONODEPTH);

obs_properties_add_int(props, "mask_every_x_frames",
obs_module_text("CalculateMaskEveryXFrame"), 1,
300, 1);

/* Background Blur Props */
obs_properties_add_int_slider(
props, "blur_background",
obs_module_text("BlurBackgroundFactor0NoBlurUseColor"), 0, 20,
1);
obs_properties_add_int_slider(props, "numThreads",
obs_module_text("NumThreads"), 0, 8, 1);

obs_properties_add_float_slider(props, "blur_focus_point",
obs_module_text("BlurFocusPoint"), 0.0,
1.0, 0.05);

UNUSED_PARAMETER(data);
return props;
Expand All @@ -163,6 +176,7 @@ void background_filter_defaults(obs_data_t *settings)
obs_data_set_default_int(settings, "mask_every_x_frames", 1);
obs_data_set_default_int(settings, "blur_background", 0);
obs_data_set_default_int(settings, "numThreads", 1);
obs_data_set_default_double(settings, "blur_focus_point", 0.1);
}

void background_filter_update(void *data, obs_data_t *settings)
Expand All @@ -182,6 +196,8 @@ void background_filter_update(void *data, obs_data_t *settings)
(int)obs_data_get_int(settings, "mask_every_x_frames");
tf->maskEveryXFramesCount = (int)(0);
tf->blurBackground = obs_data_get_int(settings, "blur_background");
tf->blurFocusPoint =
(float)obs_data_get_double(settings, "blur_focus_point");

const std::string newUseGpu = obs_data_get_string(settings, "useGPU");
const std::string newModel =
Expand Down Expand Up @@ -211,6 +227,9 @@ void background_filter_update(void *data, obs_data_t *settings)
if (tf->modelSelection == MODEL_PPHUMANSEG) {
tf->model.reset(new ModelPPHumanSeg);
}
if (tf->modelSelection == MODEL_DEPTH_TCMONODEPTH) {
tf->model.reset(new ModelTCMonoDepth);
}

createOrtSession(tf);
}
Expand Down Expand Up @@ -433,7 +452,8 @@ void background_filter_video_tick(void *data, float seconds)
}

static gs_texture_t *blur_background(struct background_removal_filter *tf,
uint32_t width, uint32_t height)
uint32_t width, uint32_t height,
gs_texture_t *alphaTexture)
{
if (tf->blurBackground == 0 || !tf->kawaseBlurEffect) {
return nullptr;
Expand All @@ -444,10 +464,18 @@ static gs_texture_t *blur_background(struct background_removal_filter *tf,
gs_texrender_get_texture(tf->texrender));
gs_eparam_t *image =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "image");
gs_eparam_t *focalmask =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "focalmask");
gs_eparam_t *xOffset =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "xOffset");
gs_eparam_t *yOffset =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "yOffset");
gs_eparam_t *blurIter =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "blurIter");
gs_eparam_t *blurTotal =
gs_effect_get_param_by_name(tf->kawaseBlurEffect, "blurTotal");
gs_eparam_t *blurFocusPointParam = gs_effect_get_param_by_name(
tf->kawaseBlurEffect, "blurFocusPoint");

for (int i = 0; i < (int)tf->blurBackground; i++) {
gs_texrender_reset(tf->texrender);
Expand All @@ -458,8 +486,12 @@ static gs_texture_t *blur_background(struct background_removal_filter *tf,
}

gs_effect_set_texture(image, blurredTexture);
gs_effect_set_texture(focalmask, alphaTexture);
gs_effect_set_float(xOffset, ((float)i + 0.5f) / (float)width);
gs_effect_set_float(yOffset, ((float)i + 0.5f) / (float)height);
gs_effect_set_int(blurIter, i);
gs_effect_set_int(blurTotal, (int)tf->blurBackground);
gs_effect_set_float(blurFocusPointParam, tf->blurFocusPoint);

struct vec4 background;
vec4_zero(&background);
Expand Down Expand Up @@ -492,22 +524,12 @@ void background_filter_video_render(void *data, gs_effect_t *_effect)
return;
}

// Output the masked image

gs_texture_t *blurredTexture = blur_background(tf, width, height);

if (!tf->effect) {
// Effect failed to load, skip rendering
obs_source_skip_video_filter(tf->source);
return;
}

if (!obs_source_process_filter_begin(tf->source, GS_RGBA,
OBS_ALLOW_DIRECT_RENDERING)) {
obs_source_skip_video_filter(tf->source);
return;
}

gs_texture_t *alphaTexture = nullptr;
{
std::lock_guard<std::mutex> lock(tf->outputLock);
Expand All @@ -520,21 +542,26 @@ void background_filter_video_render(void *data, gs_effect_t *_effect)
return;
}
}

// Output the masked image
gs_texture_t *blurredTexture =
blur_background(tf, width, height, alphaTexture);

if (!obs_source_process_filter_begin(tf->source, GS_RGBA,
OBS_ALLOW_DIRECT_RENDERING)) {
obs_source_skip_video_filter(tf->source);
gs_texture_destroy(alphaTexture);
gs_texture_destroy(blurredTexture);
return;
}

gs_eparam_t *alphamask =
gs_effect_get_param_by_name(tf->effect, "alphamask");
gs_eparam_t *blurSize =
gs_effect_get_param_by_name(tf->effect, "blurSize");
gs_eparam_t *xTexelSize =
gs_effect_get_param_by_name(tf->effect, "xTexelSize");
gs_eparam_t *yTexelSize =
gs_effect_get_param_by_name(tf->effect, "yTexelSize");
gs_eparam_t *blurredBackground =
gs_effect_get_param_by_name(tf->effect, "blurredBackground");

gs_effect_set_texture(alphamask, alphaTexture);
gs_effect_set_int(blurSize, (int)tf->blurBackground);
gs_effect_set_float(xTexelSize, 1.0f / (float)width);
gs_effect_set_float(yTexelSize, 1.0f / (float)height);

if (tf->blurBackground > 0) {
gs_effect_set_texture(blurredBackground, blurredTexture);
}
Expand Down
2 changes: 2 additions & 0 deletions src/consts.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ const char *const MODEL_ENHANCE_URETINEX = "models/uretinex_net_180x320.onnx";
const char *const MODEL_ENHANCE_SGLLIE =
"models/semantic_guided_llie_180x324.onnx";
const char *const MODEL_ENHANCE_ZERODCE = "models/zero_dce_180x320.onnx";
const char *const MODEL_DEPTH_TCMONODEPTH =
"models/tcmonodepth_tcsmallnet_192x320.onnx";

const char *const USEGPU_CPU = "cpu";
const char *const USEGPU_DML = "dml";
Expand Down
28 changes: 28 additions & 0 deletions src/models/ModelTCMonoDepth.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef MODELTCMONODEPTH_H
#define MODELTCMONODEPTH_H

#include "Model.h"

class ModelTCMonoDepth : public ModelBCHW {
private:
/* data */
public:
ModelTCMonoDepth(/* args */) {}
~ModelTCMonoDepth() {}

virtual void prepareInputToNetwork(cv::Mat &resizedImage,
cv::Mat &preprocessedImage)
{
// Do not normalize from [0, 255] to [0, 1].

hwc_to_chw(resizedImage, preprocessedImage);
}

virtual void postprocessOutput(cv::Mat &outputImage)
{
cv::normalize(outputImage, outputImage, 1.0, 0.0,
cv::NORM_MINMAX);
}
};

#endif // MODELTCMONODEPTH_H

0 comments on commit 04ba710

Please sign in to comment.