Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transmit url analytics correctly for rerun.io domains #6322

Merged
merged 2 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4122,6 +4122,7 @@ dependencies = [
"sha2",
"thiserror",
"time",
"url",
"uuid",
"web-sys",
]
Expand Down
1 change: 1 addition & 0 deletions crates/re_analytics/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ serde_json.workspace = true
sha2.workspace = true
thiserror.workspace = true
time = { workspace = true, features = ["serde", "formatting", "parsing"] }
url = { workspace = true }
uuid = { workspace = true, features = ["serde", "v4", "js"] }

[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
Expand Down
81 changes: 77 additions & 4 deletions crates/re_analytics/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ pub struct Identify {
pub struct ViewerStarted {
/// The URL on which the web viewer is running.
///
/// We _only_ collect this on `rerun.io` domains.
/// This will be used to populate `hashed_root_domain` property for all urls.
/// This will also populate `rerun_url` property if the url root domain is `rerun.io`.
pub url: Option<String>,

/// The environment in which the viewer is running.
Expand All @@ -61,7 +62,8 @@ pub struct ViewerStarted {
pub struct OpenRecording {
/// The URL on which the web viewer is running.
///
/// We _only_ collect this on `rerun.io` domains.
/// This will be used to populate `hashed_root_domain` property for all urls.
/// This will also populate `rerun_url` property if the url root domain is `rerun.io`.
pub url: Option<String>,

/// The environment in which the viewer is running.
Expand Down Expand Up @@ -125,6 +127,7 @@ impl Properties for ServeWasm {
use std::collections::HashMap;

use re_build_info::BuildInfo;
use url::Url;

use crate::{AnalyticsEvent, Event, EventKind, Properties, Property};

Expand Down Expand Up @@ -167,12 +170,40 @@ impl Event for ViewerStarted {
const NAME: &'static str = "viewer_started";
}

const RERUN_DOMAINS: [&str; 1] = ["rerun.io"];

/// Given a URL, extract the root domain.
fn extract_root_domain(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Side note: I really dislike how there's no borrowed version of Url

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ha. Yeah, I was trying for a bit to implement the root-domain extraction as a slice before I realized Url wasn't borrowed.

let domain = parsed.domain()?;
let parts = domain.split('.').collect::<Vec<_>>();
if parts.len() >= 2 {
Some(parts[parts.len() - 2..].join("."))
} else {
None
}
}

fn add_sanitized_url_properties(event: &mut AnalyticsEvent, url: Option<String>) {
let Some(root_domain) = url.as_ref().and_then(|url| extract_root_domain(url)) else {
return;
};

if RERUN_DOMAINS.contains(&root_domain.as_str()) {
event.insert_opt("rerun_url", url);
}

let hashed = Property::from(root_domain).hashed();
event.insert("hashed_root_domain", hashed);
}

impl Properties for ViewerStarted {
fn serialize(self, event: &mut AnalyticsEvent) {
let Self { url, app_env } = self;

event.insert("app_env", app_env);
event.insert_opt("url", url);

add_sanitized_url_properties(event, url);
}
}

Expand All @@ -189,7 +220,8 @@ impl Properties for OpenRecording {
data_source,
} = self;

event.insert_opt("url", url);
add_sanitized_url_properties(event, url);

event.insert("app_env", app_env);

if let Some(store_info) = store_info {
Expand Down Expand Up @@ -255,3 +287,44 @@ impl Properties for CrashSignal {
event.insert("callstack", callstack.clone());
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_root_domain() {
// Valid urls
assert_eq!(
extract_root_domain("https://rerun.io"),
Some("rerun.io".to_owned())
);
assert_eq!(
extract_root_domain("https://ReRun.io"),
Some("rerun.io".to_owned())
);
assert_eq!(
extract_root_domain("http://app.rerun.io"),
Some("rerun.io".to_owned())
);
assert_eq!(
extract_root_domain("https://www.rerun.io/viewer?url=https://app.rerun.io/version/0.15.1/examples/detect_and_track_objects.rrd"),
Some("rerun.io".to_owned())
);

// Local domains
assert_eq!(
extract_root_domain("http://localhost:9090/?url=ws://localhost:9877"),
None
);
assert_eq!(
extract_root_domain("http://127.0.0.1:9090/?url=ws://localhost:9877"),
None
);

// Invalid urls
assert_eq!(extract_root_domain("rerun.io"), None);
assert_eq!(extract_root_domain("https:/rerun"), None);
assert_eq!(extract_root_domain("https://rerun"), None);
}
}
1 change: 1 addition & 0 deletions lychee.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ exclude_path = [
"venv",

# Actually ignored files beyond .gitignore
"crates/re_analytics/src/event.rs", # Contains test with malformed urls
"scripts/lint.py", # Contains url-matching regexes that aren't actual urls
"scripts/screenshot_compare/assets/templates/",
]
Expand Down
Loading