From ef13c803d8accf2d3b6c8d9ba17a4e2da52e5978 Mon Sep 17 00:00:00 2001
From: Gijs de Jong <14833076+oxkitsune@users.noreply.github.com>
Date: Tue, 22 Oct 2024 11:16:35 +0200
Subject: [PATCH] Add  ml_depth_pro example (#7832)

### What


https://github.com/user-attachments/assets/b7adac6a-60e4-4a1c-a27b-34006b87713a

This adds an external example for visualizing DepthPro using the new
video logging api

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG

- [PR Build Summary](https://build.rerun.io/pr/7832)
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)

To run all checks from `main`, comment on the PR with `@rerun-bot
full-check`.
---
 examples/manifest.toml                 |  1 +
 examples/python/ml_depth_pro/README.md | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 examples/python/ml_depth_pro/README.md

diff --git a/examples/manifest.toml b/examples/manifest.toml
index a29d3d8aa171..992c12dd90f8 100644
--- a/examples/manifest.toml
+++ b/examples/manifest.toml
@@ -84,6 +84,7 @@ examples = [
   "live_camera_edge_detection",
   "segment_anything_model",
   "tapir",
+  "ml_depth_pro",
 ]
 
 [categories.3d-reconstruction] # NOLINT
diff --git a/examples/python/ml_depth_pro/README.md b/examples/python/ml_depth_pro/README.md
new file mode 100644
index 000000000000..65fdf482fe64
--- /dev/null
+++ b/examples/python/ml_depth_pro/README.md
@@ -0,0 +1,26 @@
+<!--[metadata]
+title = "DepthPro"
+tags = ["2D", "3D", "HuggingFace", "Pinhole camera", "Depth"]
+source = "https://github.com/rerun-io/hf-example-ml-depth-pro"
+thumbnail = "https://static.rerun.io/ml_depth_pro/e29c5afc5e4d4a36656abe0e4559a952a5a2fa68/480w.png"
+thumbnail_dimensions = [480, 294]
+-->
+
+This example visualizes the paper "Depth Pro: Sharp Monocular Metric Depth in Less Than a Second" ([arXiv](https://arxiv.org/abs/2410.02073)).
+The example runs inference for each frame in the provided video, and logs the predicted depth map to Rerun.
+
+## Background
+
+DepthPro is a fast, zero-shot monocular depth estimation model developed by Apple.
+It produces highly detailed and sharp depth maps at 2.25 megapixels in just 0.3 seconds on a standard GPU.
+The model works using a multi-scale vision transformer architecture that captures both global context and fine-grained details, enabling it to
+accurately predict metric depth _without_ requiring camera intrinsics such as focal length or principal point.
+Additionally the model is able to predict the focal length of camera used to take the photo, which is also visualized in this example.
+
+This example uses the open-source code and [model weights](https://huggingface.co/apple/DepthPro) provided by the authors.
+
+## Run the code
+
+This is an external example. Check the [repository](https://github.com/rerun-io/hf-example-ml-depth-pro) for more information.
+
+You can try the example on a HuggingFace space [here](https://huggingface.co/spaces/oxkitsune/rerun-ml-depth-pro).