From 83f48c08a4a6be79ebadf0cb3c68b1371ccb17b8 Mon Sep 17 00:00:00 2001 From: Aleh Zasypkin Date: Mon, 20 Nov 2023 00:07:15 +0100 Subject: [PATCH] feat(web-scraping): add support for custom HTTP request headers in web page trackers --- src/scheduler.rs | 2 + .../web_page_trackers_fetch_job.rs | 26 ++++++++++ .../web_page_trackers_schedule_job.rs | 7 +++ .../web_page_trackers_trigger_job.rs | 5 ++ src/utils/web_scraping.rs | 17 +++++++ src/utils/web_scraping/api_ext.rs | 49 +++++++++++++++++++ .../api_ext/web_page_tracker_create_params.rs | 9 ++++ .../api_ext/web_page_tracker_update_params.rs | 8 +++ .../database_ext/raw_web_page_tracker.rs | 25 ++++++++-- .../web_scraper_content_request.rs | 26 +++++++++- .../web_scraper_resources_request.rs | 26 +++++++++- .../web_page_tracker_settings.rs | 29 +++++++++++ 12 files changed, 221 insertions(+), 8 deletions(-) diff --git a/src/scheduler.rs b/src/scheduler.rs index 5c980db..3963833 100644 --- a/src/scheduler.rs +++ b/src/scheduler.rs @@ -215,6 +215,7 @@ mod tests { schedule: Some("1 2 3 4 5 6 2030".to_string()), delay: Default::default(), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, }, @@ -232,6 +233,7 @@ mod tests { schedule: Some("1 2 3 4 5 6 2030".to_string()), delay: Default::default(), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, }, diff --git a/src/scheduler/scheduler_jobs/web_page_trackers_fetch_job.rs b/src/scheduler/scheduler_jobs/web_page_trackers_fetch_job.rs index 14923c5..8c9de35 100644 --- a/src/scheduler/scheduler_jobs/web_page_trackers_fetch_job.rs +++ b/src/scheduler/scheduler_jobs/web_page_trackers_fetch_job.rs @@ -494,6 +494,7 @@ mod tests { schedule: Some("0 0 * * * *".to_string()), delay: Default::default(), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, }, @@ -525,6 +526,7 @@ mod tests { schedule: Some("0 0 * * * *".to_string()), delay: Default::default(), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, }, @@ -610,6 +612,11 @@ mod tests { .into_iter() .collect(), ), + headers: Some( + [("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect(), + ), enable_notifications: true, }, user_id: user.id, @@ -660,6 +667,11 @@ mod tests { .set_scripts(WebScraperResourcesRequestScripts { resource_filter_map: Some("return resource;"), }) + .set_headers( + &[("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect(), + ) .set_delay(Duration::from_millis(2000)), ) .unwrap(), @@ -778,6 +790,11 @@ mod tests { .into_iter() .collect(), ), + headers: Some( + [("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect(), + ), enable_notifications: false, }, user_id: user.id, @@ -815,6 +832,11 @@ mod tests { .set_scripts(WebScraperContentRequestScripts { extract_content: Some("return document.body.innerText;"), }) + .set_headers( + &[("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect(), + ) .set_delay(Duration::from_millis(2000)), ) .unwrap(), @@ -918,6 +940,7 @@ mod tests { schedule: Some(tracker_schedule), delay: Duration::from_secs(2), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, user_id: user.id, @@ -1099,6 +1122,7 @@ mod tests { schedule: Some(tracker_schedule), delay: Duration::from_secs(2), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, user_id: user.id, @@ -1265,6 +1289,7 @@ mod tests { schedule: Some(tracker_schedule), delay: Duration::from_secs(2), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, user_id: user.id, @@ -1430,6 +1455,7 @@ mod tests { schedule: Some(tracker_schedule), delay: Duration::from_secs(2), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, user_id: user.id, diff --git a/src/scheduler/scheduler_jobs/web_page_trackers_schedule_job.rs b/src/scheduler/scheduler_jobs/web_page_trackers_schedule_job.rs index e54b755..d91c84c 100644 --- a/src/scheduler/scheduler_jobs/web_page_trackers_schedule_job.rs +++ b/src/scheduler/scheduler_jobs/web_page_trackers_schedule_job.rs @@ -262,6 +262,7 @@ mod tests { enable_notifications: true, schedule: Some("1 2 3 4 5 6 2030".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -279,6 +280,7 @@ mod tests { enable_notifications: true, schedule: Some("1 2 3 4 5 6 2035".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -296,6 +298,7 @@ mod tests { enable_notifications: true, schedule: Some("1 2 3 4 5 6 2040".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -437,6 +440,7 @@ mod tests { enable_notifications: true, schedule: None, scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -455,6 +459,7 @@ mod tests { enable_notifications: true, schedule: None, scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -534,6 +539,7 @@ mod tests { enable_notifications: true, schedule: Some("1 2 3 4 5 6 2030".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -551,6 +557,7 @@ mod tests { enable_notifications: true, schedule: Some("1 2 3 4 5 6 2030".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) diff --git a/src/scheduler/scheduler_jobs/web_page_trackers_trigger_job.rs b/src/scheduler/scheduler_jobs/web_page_trackers_trigger_job.rs index 4cf82a9..2e83c02 100644 --- a/src/scheduler/scheduler_jobs/web_page_trackers_trigger_job.rs +++ b/src/scheduler/scheduler_jobs/web_page_trackers_trigger_job.rs @@ -226,6 +226,7 @@ mod tests { schedule: Some("0 0 * * * *".to_string()), delay: Default::default(), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, }, @@ -308,6 +309,7 @@ mod tests { schedule: Some("0 0 * * * *".to_string()), delay: Default::default(), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, }, @@ -390,6 +392,7 @@ mod tests { schedule: Some("1 0 * * * *".to_string()), delay: Default::default(), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, }, @@ -455,6 +458,7 @@ mod tests { schedule: None, delay: Default::default(), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, }, @@ -516,6 +520,7 @@ mod tests { schedule: Some("0 0 * * * *".to_string()), delay: Default::default(), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, }, diff --git a/src/utils/web_scraping.rs b/src/utils/web_scraping.rs index 8d1c294..f68e0f4 100644 --- a/src/utils/web_scraping.rs +++ b/src/utils/web_scraping.rs @@ -167,6 +167,7 @@ pub mod tests { delay: Duration::from_millis(2000), schedule: None, scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, created_at: OffsetDateTime::from_unix_timestamp(946720800)?, @@ -234,6 +235,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -304,6 +306,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -365,6 +368,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), } }))), ) @@ -414,6 +418,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), } }))), ) @@ -462,6 +467,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -483,6 +489,7 @@ pub mod tests { enable_notifications: false, schedule: Some("0 1 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), } }))), ) @@ -509,6 +516,7 @@ pub mod tests { enable_notifications: false, schedule: Some("0 1 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, created_at: tracker.created_at, meta: None @@ -537,6 +545,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -558,6 +567,7 @@ pub mod tests { enable_notifications: false, schedule: Some("0 1 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), } }))), ) @@ -584,6 +594,7 @@ pub mod tests { enable_notifications: false, schedule: Some("0 1 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, created_at: tracker.created_at, meta: None @@ -612,6 +623,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -658,6 +670,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -705,6 +718,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -722,6 +736,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -851,6 +866,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -867,6 +883,7 @@ pub mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) diff --git a/src/utils/web_scraping/api_ext.rs b/src/utils/web_scraping/api_ext.rs index eef3d87..94b20e4 100644 --- a/src/utils/web_scraping/api_ext.rs +++ b/src/utils/web_scraping/api_ext.rs @@ -263,6 +263,12 @@ impl<'a, DR: DnsResolver, ET: EmailTransport> WebScrapingApiExt<'a, DR, ET> { scraper_request }; + let scraper_request = if let Some(headers) = tracker.settings.headers.as_ref() { + scraper_request.set_headers(headers) + } else { + scraper_request + }; + let scraper_response = reqwest::Client::new() .post(format!( "{}api/web_page/resources", @@ -436,6 +442,12 @@ impl<'a, DR: DnsResolver, ET: EmailTransport> WebScrapingApiExt<'a, DR, ET> { scraper_request }; + let scraper_request = if let Some(headers) = tracker.settings.headers.as_ref() { + scraper_request.set_headers(headers) + } else { + scraper_request + }; + let scraper_response = reqwest::Client::new() .post(format!( "{}api/web_page/content", @@ -993,6 +1005,7 @@ mod tests { enable_notifications: true, schedule: None, scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -1028,6 +1041,7 @@ mod tests { enable_notifications: true, schedule: None, scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -1057,6 +1071,7 @@ mod tests { enable_notifications: true, schedule: None, scripts: Default::default(), + headers: Default::default(), }; let url = Url::parse("https://secutils.dev")?; @@ -1224,6 +1239,7 @@ mod tests { enable_notifications: true, schedule: None, scripts: Default::default(), + headers: Default::default(), }; let url = Url::parse("https://secutils.dev")?; @@ -1396,6 +1412,7 @@ mod tests { enable_notifications: true, schedule: None, scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -1503,6 +1520,7 @@ mod tests { enable_notifications: true, schedule: None, scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -1702,6 +1720,7 @@ mod tests { enable_notifications: true, schedule: None, scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -1809,6 +1828,7 @@ mod tests { enable_notifications: true, schedule: None, scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2008,6 +2028,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2103,6 +2124,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2198,6 +2220,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2259,6 +2282,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2314,6 +2338,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2366,6 +2391,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2415,6 +2441,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2465,6 +2492,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2645,6 +2673,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2718,6 +2747,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2869,6 +2899,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -2939,6 +2970,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3008,6 +3040,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3094,6 +3127,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3180,6 +3214,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3267,6 +3302,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3338,6 +3374,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3409,6 +3446,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3482,6 +3520,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3555,6 +3594,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3651,6 +3691,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3744,6 +3785,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3845,6 +3887,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -3946,6 +3989,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -4047,6 +4091,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -4153,6 +4198,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -4169,6 +4215,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -4315,6 +4362,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) @@ -4413,6 +4461,7 @@ mod tests { enable_notifications: true, schedule: Some("0 0 * * * *".to_string()), scripts: Default::default(), + headers: Default::default(), }, }, ) diff --git a/src/utils/web_scraping/api_ext/web_page_tracker_create_params.rs b/src/utils/web_scraping/api_ext/web_page_tracker_create_params.rs index 260f5fa..b23023d 100644 --- a/src/utils/web_scraping/api_ext/web_page_tracker_create_params.rs +++ b/src/utils/web_scraping/api_ext/web_page_tracker_create_params.rs @@ -46,6 +46,7 @@ mod tests { schedule: None, delay: Duration::from_millis(2000), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }, } @@ -64,6 +65,9 @@ mod tests { "scripts": { "resourceFilterMap": "return resource;" }, + "headers": { + "cookie": "my-cookie" + }, "enableNotifications": true } } @@ -85,6 +89,11 @@ mod tests { .cloned() .collect() ), + headers: Some( + [("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect(), + ), enable_notifications: true, }, } diff --git a/src/utils/web_scraping/api_ext/web_page_tracker_update_params.rs b/src/utils/web_scraping/api_ext/web_page_tracker_update_params.rs index e765a8a..cc7a630 100644 --- a/src/utils/web_scraping/api_ext/web_page_tracker_update_params.rs +++ b/src/utils/web_scraping/api_ext/web_page_tracker_update_params.rs @@ -52,6 +52,9 @@ mod tests { "scripts": { "resourceFilterMap": "return resource;" }, + "headers": { + "cookie": "my-cookie" + }, "enableNotifications": true } } @@ -72,6 +75,11 @@ mod tests { .into_iter() .collect() ), + headers: Some( + [("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect(), + ), enable_notifications: true, }), } diff --git a/src/utils/web_scraping/database_ext/raw_web_page_tracker.rs b/src/utils/web_scraping/database_ext/raw_web_page_tracker.rs index 4a833ae..6ada127 100644 --- a/src/utils/web_scraping/database_ext/raw_web_page_tracker.rs +++ b/src/utils/web_scraping/database_ext/raw_web_page_tracker.rs @@ -22,6 +22,7 @@ pub(super) struct RawWebPageTrackerData { pub revisions: usize, pub delay: u64, pub scripts: Option>, + pub headers: Option>, pub enable_notifications: bool, pub meta: Option, } @@ -45,6 +46,7 @@ impl TryFrom for WebPageTracker delay: Duration::from_millis(raw_data.delay), schedule: raw.schedule, scripts: raw_data.scripts, + headers: raw_data.headers, enable_notifications: raw_data.enable_notifications, }, created_at: OffsetDateTime::from_unix_timestamp(raw.created_at)?, @@ -61,6 +63,7 @@ impl TryFrom<&WebPageTracker> for RawWebPageTracker revisions: item.settings.revisions, delay: item.settings.delay.as_millis() as u64, scripts: item.settings.scripts.clone(), + headers: item.settings.headers.clone(), enable_notifications: item.settings.enable_notifications, meta: item.meta.clone(), }; @@ -108,7 +111,7 @@ mod tests { schedule: None, user_id: *mock_user()?.id, job_id: None, - data: vec![1, 0, 0, 0, 0], + data: vec![1, 0, 0, 0, 0, 0], // January 1, 2000 10:00:00 created_at: 946720800, })?, @@ -123,6 +126,7 @@ mod tests { schedule: None, delay: Default::default(), scripts: Default::default(), + headers: Default::default(), enable_notifications: false, }, created_at: OffsetDateTime::from_unix_timestamp(946720800)?, @@ -148,7 +152,8 @@ mod tests { data: vec![ 1, 208, 15, 1, 1, 17, 114, 101, 115, 111, 117, 114, 99, 101, 70, 105, 108, 116, 101, 114, 77, 97, 112, 16, 114, 101, 116, 117, 114, 110, 32, 114, 101, 115, - 111, 117, 114, 99, 101, 59, 1, 0 + 111, 117, 114, 99, 101, 59, 1, 1, 6, 99, 111, 111, 107, 105, 101, 9, 109, 121, + 45, 99, 111, 111, 107, 105, 101, 1, 0 ], // January 1, 2000 10:00:00 created_at: 946720800, @@ -171,6 +176,11 @@ mod tests { .into_iter() .collect() ), + headers: Some( + [("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect() + ), enable_notifications: true, }, created_at: OffsetDateTime::from_unix_timestamp(946720800)?, @@ -195,6 +205,7 @@ mod tests { schedule: None, delay: Default::default(), scripts: Default::default(), + headers: Default::default(), enable_notifications: false, }, created_at: OffsetDateTime::from_unix_timestamp(946720800)?, @@ -210,7 +221,7 @@ mod tests { schedule: None, user_id: *mock_user()?.id, job_id: None, - data: vec![1, 0, 0, 0, 0], + data: vec![1, 0, 0, 0, 0, 0], // January 1, 2000 10:00:00 created_at: 946720800, } @@ -235,6 +246,11 @@ mod tests { .into_iter() .collect() ), + headers: Some( + [("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect() + ), enable_notifications: true, }, created_at: OffsetDateTime::from_unix_timestamp(946720800)?, @@ -257,7 +273,8 @@ mod tests { data: vec![ 1, 208, 15, 1, 1, 17, 114, 101, 115, 111, 117, 114, 99, 101, 70, 105, 108, 116, 101, 114, 77, 97, 112, 16, 114, 101, 116, 117, 114, 110, 32, 114, 101, 115, - 111, 117, 114, 99, 101, 59, 1, 0 + 111, 117, 114, 99, 101, 59, 1, 1, 6, 99, 111, 111, 107, 105, 101, 9, 109, 121, + 45, 99, 111, 111, 107, 105, 101, 1, 0 ], // January 1, 2000 10:00:00 created_at: 946720800, diff --git a/src/utils/web_scraping/web_page_trackers/web_page_content/web_scraper_content_request.rs b/src/utils/web_scraping/web_page_trackers/web_page_content/web_scraper_content_request.rs index c2231f3..91de63d 100644 --- a/src/utils/web_scraping/web_page_trackers/web_page_content/web_scraper_content_request.rs +++ b/src/utils/web_scraping/web_page_trackers/web_page_content/web_scraper_content_request.rs @@ -1,5 +1,5 @@ use serde::Serialize; -use std::time::Duration; +use std::{collections::HashMap, time::Duration}; use url::Url; /// Scripts to inject into the web page before extracting content to track. @@ -44,6 +44,10 @@ pub struct WebScraperContentRequest<'a> { /// Optional scripts to inject into the web page before extracting content. #[serde(skip_serializing_if = "WebScraperContentRequestScripts::is_empty")] pub scripts: WebScraperContentRequestScripts<'a>, + + /// Optional content of the web page that has been extracted previously. + #[serde(skip_serializing_if = "Option::is_none")] + pub headers: Option<&'a HashMap>, } impl<'a> WebScraperContentRequest<'a> { @@ -57,6 +61,7 @@ impl<'a> WebScraperContentRequest<'a> { wait_selector: None, previous_content: None, scripts: Default::default(), + headers: None, } } @@ -80,6 +85,14 @@ impl<'a> WebScraperContentRequest<'a> { pub fn set_scripts(self, scripts: WebScraperContentRequestScripts<'a>) -> Self { Self { scripts, ..self } } + + /// Sets headers to attach to every request to the tracked web page. + pub fn set_headers(self, headers: &'a HashMap) -> Self { + Self { + headers: Some(headers), + ..self + } + } } #[cfg(test)] @@ -98,7 +111,12 @@ mod tests { previous_content: Some("some content"), scripts: WebScraperContentRequestScripts { extract_content: Some("return resource;") - } + }, + headers: Some( + &[("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect(), + ), }, @r###" { "url": "http://localhost:1234/my/app?q=2", @@ -108,6 +126,9 @@ mod tests { "previousContent": "some content", "scripts": { "extractContent": "return resource;" + }, + "headers": { + "cookie": "my-cookie" } } "###); @@ -143,6 +164,7 @@ mod tests { assert!(request.delay.is_none()); assert!(request.timeout.is_none()); assert!(request.scripts.is_empty()); + assert!(request.headers.is_none()); Ok(()) } diff --git a/src/utils/web_scraping/web_page_trackers/web_page_resources/web_scraper_resources_request.rs b/src/utils/web_scraping/web_page_trackers/web_page_resources/web_scraper_resources_request.rs index 8dfed1a..15c5430 100644 --- a/src/utils/web_scraping/web_page_trackers/web_page_resources/web_scraper_resources_request.rs +++ b/src/utils/web_scraping/web_page_trackers/web_page_resources/web_scraper_resources_request.rs @@ -1,5 +1,5 @@ use serde::Serialize; -use std::time::Duration; +use std::{collections::HashMap, time::Duration}; use url::Url; /// Scripts to inject into the web page before extracting resources to track. @@ -40,6 +40,10 @@ pub struct WebScraperResourcesRequest<'a> { /// Optional scripts to inject into the web page before extracting resources to track.. #[serde(skip_serializing_if = "WebScraperResourcesRequestScripts::is_empty")] pub scripts: WebScraperResourcesRequestScripts<'a>, + + /// Optional content of the web page that has been extracted previously. + #[serde(skip_serializing_if = "Option::is_none")] + pub headers: Option<&'a HashMap>, } impl<'a> WebScraperResourcesRequest<'a> { @@ -52,6 +56,7 @@ impl<'a> WebScraperResourcesRequest<'a> { delay: None, wait_selector: None, scripts: Default::default(), + headers: None, } } @@ -67,6 +72,14 @@ impl<'a> WebScraperResourcesRequest<'a> { pub fn set_scripts(self, scripts: WebScraperResourcesRequestScripts<'a>) -> Self { Self { scripts, ..self } } + + /// Sets headers to attach to every request to the tracked web page. + pub fn set_headers(self, headers: &'a HashMap) -> Self { + Self { + headers: Some(headers), + ..self + } + } } #[cfg(test)] @@ -84,7 +97,12 @@ mod tests { wait_selector: Some("body"), scripts: WebScraperResourcesRequestScripts { resource_filter_map: Some("return resource;") - } + }, + headers: Some( + &[("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect(), + ), }, @r###" { "url": "http://localhost:1234/my/app?q=2", @@ -93,6 +111,9 @@ mod tests { "waitSelector": "body", "scripts": { "resourceFilterMap": "return resource;" + }, + "headers": { + "cookie": "my-cookie" } } "###); @@ -127,6 +148,7 @@ mod tests { assert!(request.delay.is_none()); assert!(request.timeout.is_none()); assert!(request.scripts.is_empty()); + assert!(request.headers.is_none()); Ok(()) } diff --git a/src/utils/web_scraping/web_page_trackers/web_page_tracker_settings.rs b/src/utils/web_scraping/web_page_trackers/web_page_tracker_settings.rs index a37599b..bcff211 100644 --- a/src/utils/web_scraping/web_page_trackers/web_page_tracker_settings.rs +++ b/src/utils/web_scraping/web_page_trackers/web_page_tracker_settings.rs @@ -23,6 +23,9 @@ pub struct WebPageTrackerSettings { /// Optional scripts to inject into the tracked web page. #[serde(skip_serializing_if = "Option::is_none")] pub scripts: Option>, + /// Optional list of HTTP headers that should be sent with the tracker requests. + #[serde(skip_serializing_if = "Option::is_none")] + pub headers: Option>, /// Indicates that web page change notifications are enabled for this tracker. pub enable_notifications: bool, } @@ -43,6 +46,7 @@ mod tests { schedule: None, delay: Duration::from_millis(2500), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }; assert_json_snapshot!(settings, @r###" @@ -58,6 +62,7 @@ mod tests { schedule: Some("0 0 * * *".to_string()), delay: Duration::from_millis(2500), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }; assert_json_snapshot!(settings, @r###" @@ -81,6 +86,11 @@ mod tests { .into_iter() .collect(), ), + headers: Some( + [("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect(), + ), enable_notifications: true, }; assert_json_snapshot!(settings, @r###" @@ -91,6 +101,9 @@ mod tests { "scripts": { "resourceFilterMap": "return resource;" }, + "headers": { + "cookie": "my-cookie" + }, "enableNotifications": true } "###); @@ -100,6 +113,7 @@ mod tests { schedule: Some("0 0 * * *".to_string()), delay: Duration::from_millis(2500), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }; assert_json_snapshot!(settings, @r###" @@ -116,6 +130,7 @@ mod tests { schedule: Some("0 0 * * *".to_string()), delay: Duration::from_millis(2500), scripts: Default::default(), + headers: Default::default(), enable_notifications: false, }; assert_json_snapshot!(settings, @r###" @@ -137,6 +152,7 @@ mod tests { schedule: None, delay: Duration::from_millis(2000), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }; assert_eq!( @@ -151,6 +167,7 @@ mod tests { schedule: Some("0 0 * * *".to_string()), delay: Duration::from_millis(2000), scripts: Default::default(), + headers: Default::default(), enable_notifications: true, }; assert_eq!( @@ -172,6 +189,11 @@ mod tests { .into_iter() .collect(), ), + headers: Some( + [("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect(), + ), enable_notifications: true, }; assert_eq!( @@ -181,6 +203,7 @@ mod tests { "delay": 2000, "schedule": "0 0 * * *", "scripts": { "resourceFilterMap": "return resource;" }, + "headers": { "cookie": "my-cookie" }, "enableNotifications": true }) .to_string() @@ -200,6 +223,11 @@ mod tests { .into_iter() .collect(), ), + headers: Some( + [("cookie".to_string(), "my-cookie".to_string())] + .into_iter() + .collect(), + ), enable_notifications: false, }; assert_eq!( @@ -209,6 +237,7 @@ mod tests { "delay": 2000, "schedule": "0 0 * * *", "scripts": { "resourceFilterMap": "return resource;" }, + "headers": { "cookie": "my-cookie" }, "enableNotifications": false }) .to_string()