Skip to content

Commit

Permalink
Transmit url analytics correctly for rerun.io domains (#6322)
Browse files Browse the repository at this point in the history
### What
Resolves:
- #6274
- #6288

I added the processing at the last stage of the analytics output so we
don't depend on every code path possibily populating the analytics event
to do its own domain evaluation and/or hashing.

Validated events look correct in posthog:

![image](https://github.com/rerun-io/rerun/assets/3312232/17ddf585-5135-4ad1-8132-f64661b9e730)


### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested the web demo (if applicable):
* Using examples from latest `main` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/6322?manifest_url=https://app.rerun.io/version/main/examples_manifest.json)
* Using full set of examples from `nightly` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/6322?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG
* [x] If applicable, add a new check to the [release
checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)!

- [PR Build Summary](https://build.rerun.io/pr/6322)
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)

To run all checks from `main`, comment on the PR with `@rerun-bot
full-check`.
  • Loading branch information
jleibs authored May 14, 2024
1 parent 401c853 commit 178ef60
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 4 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4122,6 +4122,7 @@ dependencies = [
"sha2",
"thiserror",
"time",
"url",
"uuid",
"web-sys",
]
Expand Down
1 change: 1 addition & 0 deletions crates/re_analytics/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ serde_json.workspace = true
sha2.workspace = true
thiserror.workspace = true
time = { workspace = true, features = ["serde", "formatting", "parsing"] }
url = { workspace = true }
uuid = { workspace = true, features = ["serde", "v4", "js"] }

[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
Expand Down
81 changes: 77 additions & 4 deletions crates/re_analytics/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ pub struct Identify {
pub struct ViewerStarted {
/// The URL on which the web viewer is running.
///
/// We _only_ collect this on `rerun.io` domains.
/// This will be used to populate `hashed_root_domain` property for all urls.
/// This will also populate `rerun_url` property if the url root domain is `rerun.io`.
pub url: Option<String>,

/// The environment in which the viewer is running.
Expand All @@ -61,7 +62,8 @@ pub struct ViewerStarted {
pub struct OpenRecording {
/// The URL on which the web viewer is running.
///
/// We _only_ collect this on `rerun.io` domains.
/// This will be used to populate `hashed_root_domain` property for all urls.
/// This will also populate `rerun_url` property if the url root domain is `rerun.io`.
pub url: Option<String>,

/// The environment in which the viewer is running.
Expand Down Expand Up @@ -125,6 +127,7 @@ impl Properties for ServeWasm {
use std::collections::HashMap;

use re_build_info::BuildInfo;
use url::Url;

use crate::{AnalyticsEvent, Event, EventKind, Properties, Property};

Expand Down Expand Up @@ -167,12 +170,40 @@ impl Event for ViewerStarted {
const NAME: &'static str = "viewer_started";
}

const RERUN_DOMAINS: [&str; 1] = ["rerun.io"];

/// Given a URL, extract the root domain.
fn extract_root_domain(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let domain = parsed.domain()?;
let parts = domain.split('.').collect::<Vec<_>>();
if parts.len() >= 2 {
Some(parts[parts.len() - 2..].join("."))
} else {
None
}
}

fn add_sanitized_url_properties(event: &mut AnalyticsEvent, url: Option<String>) {
let Some(root_domain) = url.as_ref().and_then(|url| extract_root_domain(url)) else {
return;
};

if RERUN_DOMAINS.contains(&root_domain.as_str()) {
event.insert_opt("rerun_url", url);
}

let hashed = Property::from(root_domain).hashed();
event.insert("hashed_root_domain", hashed);
}

impl Properties for ViewerStarted {
fn serialize(self, event: &mut AnalyticsEvent) {
let Self { url, app_env } = self;

event.insert("app_env", app_env);
event.insert_opt("url", url);

add_sanitized_url_properties(event, url);
}
}

Expand All @@ -189,7 +220,8 @@ impl Properties for OpenRecording {
data_source,
} = self;

event.insert_opt("url", url);
add_sanitized_url_properties(event, url);

event.insert("app_env", app_env);

if let Some(store_info) = store_info {
Expand Down Expand Up @@ -255,3 +287,44 @@ impl Properties for CrashSignal {
event.insert("callstack", callstack.clone());
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_root_domain() {
// Valid urls
assert_eq!(
extract_root_domain("https://rerun.io"),
Some("rerun.io".to_owned())
);
assert_eq!(
extract_root_domain("https://ReRun.io"),
Some("rerun.io".to_owned())
);
assert_eq!(
extract_root_domain("http://app.rerun.io"),
Some("rerun.io".to_owned())
);
assert_eq!(
extract_root_domain("https://www.rerun.io/viewer?url=https://app.rerun.io/version/0.15.1/examples/detect_and_track_objects.rrd"),
Some("rerun.io".to_owned())
);

// Local domains
assert_eq!(
extract_root_domain("http://localhost:9090/?url=ws://localhost:9877"),
None
);
assert_eq!(
extract_root_domain("http://127.0.0.1:9090/?url=ws://localhost:9877"),
None
);

// Invalid urls
assert_eq!(extract_root_domain("rerun.io"), None);
assert_eq!(extract_root_domain("https:/rerun"), None);
assert_eq!(extract_root_domain("https://rerun"), None);
}
}
1 change: 1 addition & 0 deletions lychee.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ exclude_path = [
"venv",

# Actually ignored files beyond .gitignore
"crates/re_analytics/src/event.rs", # Contains test with malformed urls
"scripts/lint.py", # Contains url-matching regexes that aren't actual urls
"scripts/screenshot_compare/assets/templates/",
]
Expand Down

0 comments on commit 178ef60

Please sign in to comment.