diff --git a/assets/templates/tracker_styles.hbs b/assets/templates/tracker_styles.hbs
new file mode 100644
index 0000000..9bde122
--- /dev/null
+++ b/assets/templates/tracker_styles.hbs
@@ -0,0 +1,14 @@
+
diff --git a/assets/templates/web_page_content_tracker_changes_email.hbs b/assets/templates/web_page_content_tracker_changes_email.hbs
index 157e00e..3fb4508 100644
--- a/assets/templates/web_page_content_tracker_changes_email.hbs
+++ b/assets/templates/web_page_content_tracker_changes_email.hbs
@@ -1,28 +1,15 @@
-
-
"{{tracker_name}}" content tracker detected changes
+
"{{tracker_name}}" tracker detected content changes
To learn more, visit the Content trackers page:
Web Scraping → Content trackers
If the button above doesn't work, you can navigate to the following URL directly:
diff --git a/assets/templates/web_page_content_tracker_changes_error_email.hbs b/assets/templates/web_page_content_tracker_changes_error_email.hbs
new file mode 100644
index 0000000..18b2229
--- /dev/null
+++ b/assets/templates/web_page_content_tracker_changes_error_email.hbs
@@ -0,0 +1,21 @@
+
+
+
+
"{{tracker_name}}" tracker failed to check for content changes
+
+
+ {{> email_styles}}
+ {{> tracker_styles}}
+
+
+
+
"{{tracker_name}}" tracker failed to check for content changes
+
There was an error while checking content: {{error_message}}.
+
To check the tracker configuration and re-try, visit the Content trackers page:
+
Web Scraping → Content trackers
+
If the button above doesn't work, you can navigate to the following URL directly:
+
{{back_link}}
+
+
+
+
diff --git a/assets/templates/web_page_resources_tracker_changes_email.hbs b/assets/templates/web_page_resources_tracker_changes_email.hbs
index 8beb3af..7e46a33 100644
--- a/assets/templates/web_page_resources_tracker_changes_email.hbs
+++ b/assets/templates/web_page_resources_tracker_changes_email.hbs
@@ -1,28 +1,15 @@
-
"{{tracker_name}}" resources tracker detected {{changes_count}} changes
+
"{{tracker_name}}" tracker detected {{changes_count}} changes in resources
{{> email_styles}}
-
+ {{> tracker_styles}}
-
"{{tracker_name}}" resources tracker detected {{changes_count}} changes
+
"{{tracker_name}}" tracker detected {{changes_count}} changes in resources
To learn more, visit the Resources trackers page:
Web Scraping → Resources trackers
If the button above doesn't work, you can navigate to the following URL directly:
diff --git a/assets/templates/web_page_resources_tracker_changes_error_email.hbs b/assets/templates/web_page_resources_tracker_changes_error_email.hbs
new file mode 100644
index 0000000..599a2c7
--- /dev/null
+++ b/assets/templates/web_page_resources_tracker_changes_error_email.hbs
@@ -0,0 +1,21 @@
+
+
+
+
"{{tracker_name}}" tracker failed to check for changes in resources
+
+
+ {{> email_styles}}
+ {{> tracker_styles}}
+
+
+
+
"{{tracker_name}}" tracker failed to check for changes in resources
+
There was an error while checking resources: {{error_message}}.
+
To check the tracker configuration and re-try, visit the Resources trackers page:
+
Web Scraping → Resources trackers
+
If the button above doesn't work, you can navigate to the following URL directly:
+
{{back_link}}
+
+
+
+
diff --git a/src/notifications/notification_content_template.rs b/src/notifications/notification_content_template.rs
index 24814f2..a4f518c 100644
--- a/src/notifications/notification_content_template.rs
+++ b/src/notifications/notification_content_template.rs
@@ -22,9 +22,11 @@ pub enum NotificationContentTemplate {
WebPageResourcesTrackerChanges {
tracker_name: String,
changes_count: usize,
+ error_message: Option
,
},
WebPageContentTrackerChanges {
tracker_name: String,
+ error_message: Option,
},
}
@@ -44,16 +46,26 @@ impl NotificationContentTemplate {
NotificationContentTemplate::WebPageResourcesTrackerChanges {
tracker_name,
changes_count,
+ error_message,
} => {
web_page_resources_tracker_changes::compile_to_email(
api,
tracker_name,
*changes_count,
+ error_message.as_deref(),
)
.await
}
- NotificationContentTemplate::WebPageContentTrackerChanges { tracker_name } => {
- web_page_content_tracker_changes::compile_to_email(api, tracker_name).await
+ NotificationContentTemplate::WebPageContentTrackerChanges {
+ tracker_name,
+ error_message,
+ } => {
+ web_page_content_tracker_changes::compile_to_email(
+ api,
+ tracker_name,
+ error_message.as_deref(),
+ )
+ .await
}
}
}
@@ -147,6 +159,61 @@ mod tests {
let mut template = NotificationContentTemplate::WebPageResourcesTrackerChanges {
tracker_name: "tracker".to_string(),
changes_count: 10,
+ error_message: None,
+ }
+ .compile_to_email(&api)
+ .await?;
+ template
+ .attachments
+ .as_mut()
+ .unwrap()
+ .iter_mut()
+ .for_each(|a| {
+ a.content = a.content.len().to_be_bytes().iter().cloned().collect_vec();
+ });
+
+ assert_debug_snapshot!(template, @r###"
+ EmailNotificationContent {
+ subject: "[Secutils.dev] Change detected: \"tracker\"",
+ text: "\"tracker\" tracker detected 10 changes in resources. Visit http://localhost:1234/ws/web_scraping__resources to learn more.",
+ html: Some(
+ "\n\n\n \"tracker\" tracker detected 10 changes in resources\n \n \n \n \n\n\n\n
\"tracker\" tracker detected 10 changes in resources
\n
To learn more, visit the Resources trackers page:
\n
Web Scraping → Resources trackers\n
If the button above doesn't work, you can navigate to the following URL directly:
\n
http://localhost:1234/ws/web_scraping__resources
\n
\n
\n\n\n",
+ ),
+ attachments: Some(
+ [
+ EmailNotificationAttachment {
+ disposition: Inline(
+ "secutils-logo",
+ ),
+ content_type: "image/png",
+ content: [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 15,
+ 165,
+ ],
+ },
+ ],
+ ),
+ }
+ "###
+ );
+
+ Ok(())
+ }
+
+ #[tokio::test]
+ async fn can_compile_resources_tracker_changes_error_template_to_email() -> anyhow::Result<()> {
+ let api = mock_api().await?;
+
+ let mut template = NotificationContentTemplate::WebPageResourcesTrackerChanges {
+ tracker_name: "tracker".to_string(),
+ changes_count: 0,
+ error_message: Some("Something went wrong".to_string()),
}
.compile_to_email(&api)
.await?;
@@ -161,10 +228,10 @@ mod tests {
assert_debug_snapshot!(template, @r###"
EmailNotificationContent {
- subject: "Notification: \"tracker\" resources tracker detected 10 changes",
- text: "\"tracker\" resources tracker detected 10 changes. Visit http://localhost:1234/ws/web_scraping__resources to learn more.",
+ subject: "[Secutils.dev] Check failed: \"tracker\"",
+ text: "\"tracker\" tracker failed to check for changes in resources due to the following error: Something went wrong. Visit http://localhost:1234/ws/web_scraping__resources to learn more.",
html: Some(
- "\n\n\n \"tracker\" resources tracker detected 10 changes\n \n \n \n \n\n\n\n
\"tracker\" resources tracker detected 10 changes
\n
To learn more, visit the Resources trackers page:
\n
Web Scraping → Resources trackers\n
If the button above doesn't work, you can navigate to the following URL directly:
\n
http://localhost:1234/ws/web_scraping__resources
\n
\n
\n\n\n",
+ "\n\n\n \"tracker\" tracker failed to check for changes in resources\n \n \n \n \n\n\n\n
\"tracker\" tracker failed to check for changes in resources
\n
There was an error while checking resources: Something went wrong.
\n
To check the tracker configuration and re-try, visit the Resources trackers page:
\n
Web Scraping → Resources trackers\n
If the button above doesn't work, you can navigate to the following URL directly:
\n
http://localhost:1234/ws/web_scraping__resources
\n
\n
\n\n\n",
),
attachments: Some(
[
@@ -199,6 +266,60 @@ mod tests {
let mut template = NotificationContentTemplate::WebPageContentTrackerChanges {
tracker_name: "tracker".to_string(),
+ error_message: None,
+ }
+ .compile_to_email(&api)
+ .await?;
+ template
+ .attachments
+ .as_mut()
+ .unwrap()
+ .iter_mut()
+ .for_each(|a| {
+ a.content = a.content.len().to_be_bytes().iter().cloned().collect_vec();
+ });
+
+ assert_debug_snapshot!(template, @r###"
+ EmailNotificationContent {
+ subject: "[Secutils.dev] Change detected: \"tracker\"",
+ text: "\"tracker\" tracker detected content changes. Visit http://localhost:1234/ws/web_scraping__content to learn more.",
+ html: Some(
+ "\n\n\n \"tracker\" tracker detected content changes\n \n \n \n \n\n\n\n
\"tracker\" tracker detected content changes
\n
To learn more, visit the Content trackers page:
\n
Web Scraping → Content trackers\n
If the button above doesn't work, you can navigate to the following URL directly:
\n
http://localhost:1234/ws/web_scraping__content
\n
\n
\n\n\n",
+ ),
+ attachments: Some(
+ [
+ EmailNotificationAttachment {
+ disposition: Inline(
+ "secutils-logo",
+ ),
+ content_type: "image/png",
+ content: [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 15,
+ 165,
+ ],
+ },
+ ],
+ ),
+ }
+ "###
+ );
+
+ Ok(())
+ }
+
+ #[tokio::test]
+ async fn can_compile_content_tracker_changes_error_template_to_email() -> anyhow::Result<()> {
+ let api = mock_api().await?;
+
+ let mut template = NotificationContentTemplate::WebPageContentTrackerChanges {
+ tracker_name: "tracker".to_string(),
+ error_message: Some("Something went wrong".to_string()),
}
.compile_to_email(&api)
.await?;
@@ -213,10 +334,10 @@ mod tests {
assert_debug_snapshot!(template, @r###"
EmailNotificationContent {
- subject: "Notification: \"tracker\" content tracker detected changes",
- text: "\"tracker\" content tracker detected changes. Visit http://localhost:1234/ws/web_scraping__content to learn more.",
+ subject: "[Secutils.dev] Check failed: \"tracker\"",
+ text: "\"tracker\" tracker failed to check for content changes due to the following error: Something went wrong. Visit http://localhost:1234/ws/web_scraping__content to learn more.",
html: Some(
- "\n\n\n \"tracker\" content tracker detected changes\n \n \n \n \n\n\n\n
\"tracker\" content tracker detected changes
\n
To learn more, visit the Content trackers page:
\n
Web Scraping → Content trackers\n
If the button above doesn't work, you can navigate to the following URL directly:
\n
http://localhost:1234/ws/web_scraping__content
\n
\n
\n\n\n",
+ "\n\n\n \"tracker\" tracker failed to check for content changes\n \n \n \n \n\n\n\n
\"tracker\" tracker failed to check for content changes
\n
There was an error while checking content: Something went wrong.
\n
To check the tracker configuration and re-try, visit the Content trackers page:
\n
Web Scraping → Content trackers\n
If the button above doesn't work, you can navigate to the following URL directly:
\n
http://localhost:1234/ws/web_scraping__content
\n
\n
\n\n\n",
),
attachments: Some(
[
diff --git a/src/notifications/notification_content_template/web_page_content_tracker_changes.rs b/src/notifications/notification_content_template/web_page_content_tracker_changes.rs
index 4c16ae8..46e93a8 100644
--- a/src/notifications/notification_content_template/web_page_content_tracker_changes.rs
+++ b/src/notifications/notification_content_template/web_page_content_tracker_changes.rs
@@ -12,25 +12,49 @@ pub const NOTIFICATION_LOGO_BYTES: &[u8] =
pub async fn compile_to_email(
api: &Api,
tracker_name: &str,
+ error_message: Option<&str>,
) -> anyhow::Result {
let back_link = format!("{}ws/web_scraping__content", api.config.public_url);
- Ok(EmailNotificationContent::html_with_attachments(
- format!(
- "Notification: \"{}\" content tracker detected changes",
- tracker_name
- ),
- format!(
- "\"{}\" content tracker detected changes. Visit {} to learn more.",
- tracker_name, back_link
- ),
- api.templates.render(
- "web_page_content_tracker_changes_email",
- &json!({
+
+ let (subject, text, html) = if let Some(error_message) = error_message {
+ (
+ format!("[Secutils.dev] Check failed: \"{}\"", tracker_name),
+ format!(
+ "\"{}\" tracker failed to check for content changes due to the following error: {error_message}. Visit {} to learn more.",
+ tracker_name, back_link
+ ),
+ api.templates.render(
+ "web_page_content_tracker_changes_error_email",
+ &json!({
"tracker_name": tracker_name,
+ "error_message": error_message,
"back_link": back_link,
"home_link": api.config.public_url.as_str(),
}),
- )?,
+ )?
+ )
+ } else {
+ (
+ format!("[Secutils.dev] Change detected: \"{}\"", tracker_name),
+ format!(
+ "\"{}\" tracker detected content changes. Visit {} to learn more.",
+ tracker_name, back_link
+ ),
+ api.templates.render(
+ "web_page_content_tracker_changes_email",
+ &json!({
+ "tracker_name": tracker_name,
+ "back_link": back_link,
+ "home_link": api.config.public_url.as_str(),
+ }),
+ )?,
+ )
+ };
+
+ Ok(EmailNotificationContent::html_with_attachments(
+ subject,
+ text,
+ html,
vec![EmailNotificationAttachment::inline(
"secutils-logo",
"image/png",
diff --git a/src/notifications/notification_content_template/web_page_resources_tracker_changes.rs b/src/notifications/notification_content_template/web_page_resources_tracker_changes.rs
index 8ba76a0..f249c0d 100644
--- a/src/notifications/notification_content_template/web_page_resources_tracker_changes.rs
+++ b/src/notifications/notification_content_template/web_page_resources_tracker_changes.rs
@@ -13,26 +13,50 @@ pub async fn compile_to_email(
api: &Api,
tracker_name: &str,
changes_count: usize,
+ error_message: Option<&str>,
) -> anyhow::Result {
let back_link = format!("{}ws/web_scraping__resources", api.config.public_url);
+
+ let (subject, text, html) = if let Some(error_message) = error_message {
+ (
+ format!("[Secutils.dev] Check failed: \"{}\"", tracker_name),
+ format!(
+ "\"{}\" tracker failed to check for changes in resources due to the following error: {error_message}. Visit {} to learn more.",
+ tracker_name, back_link
+ ),
+ api.templates.render(
+ "web_page_resources_tracker_changes_error_email",
+ &json!({
+ "tracker_name": tracker_name,
+ "error_message": error_message,
+ "back_link": back_link,
+ "home_link": api.config.public_url.as_str(),
+ }),
+ )?
+ )
+ } else {
+ (
+ format!("[Secutils.dev] Change detected: \"{}\"", tracker_name),
+ format!(
+ "\"{}\" tracker detected {} changes in resources. Visit {} to learn more.",
+ tracker_name, changes_count, back_link
+ ),
+ api.templates.render(
+ "web_page_resources_tracker_changes_email",
+ &json!({
+ "tracker_name": tracker_name,
+ "changes_count": changes_count,
+ "back_link": back_link,
+ "home_link": api.config.public_url.as_str(),
+ }),
+ )?,
+ )
+ };
+
Ok(EmailNotificationContent::html_with_attachments(
- format!(
- "Notification: \"{}\" resources tracker detected {} changes",
- tracker_name, changes_count
- ),
- format!(
- "\"{}\" resources tracker detected {} changes. Visit {} to learn more.",
- tracker_name, changes_count, back_link
- ),
- api.templates.render(
- "web_page_resources_tracker_changes_email",
- &json!({
- "tracker_name": tracker_name,
- "changes_count": changes_count,
- "back_link": back_link,
- "home_link": api.config.public_url.as_str(),
- }),
- )?,
+ subject,
+ text,
+ html,
vec![EmailNotificationAttachment::inline(
"secutils-logo",
"image/png",
diff --git a/src/scheduler/scheduler_jobs/web_page_trackers_fetch_job.rs b/src/scheduler/scheduler_jobs/web_page_trackers_fetch_job.rs
index bf153b4..14923c5 100644
--- a/src/scheduler/scheduler_jobs/web_page_trackers_fetch_job.rs
+++ b/src/scheduler/scheduler_jobs/web_page_trackers_fetch_job.rs
@@ -1,5 +1,6 @@
use crate::{
api::Api,
+ error::Error as SecutilsError,
network::{DnsResolver, EmailTransport, EmailTransportError},
notifications::{NotificationContent, NotificationContentTemplate, NotificationDestination},
scheduler::scheduler_job::SchedulerJob,
@@ -118,20 +119,41 @@ impl WebPageTrackersFetchJob {
// Create a new revision and retrieve a diff if any changes from the previous version are
// detected. If there are any changes and the tracker hasn't opted out of notifications,
// schedule a notification about the detected changes.
- let new_revision_with_diff =
- match web_scraping
- .create_resources_tracker_revision(tracker.user_id, tracker.id)
- .await
- {
- Ok(new_revision_with_diff) => new_revision_with_diff,
- Err(err) => {
- log::error!(
+ let new_revision_with_diff = match web_scraping
+ .create_resources_tracker_revision(tracker.user_id, tracker.id)
+ .await
+ {
+ Ok(new_revision_with_diff) => new_revision_with_diff,
+ Err(err) => {
+ log::error!(
"Failed to create web page tracker ('{}') history revision, took {}: {:?}.",
- tracker.id, humantime::format_duration(fetch_start.elapsed()), err
+ tracker.id,
+ humantime::format_duration(fetch_start.elapsed()),
+ err
);
- continue;
- }
- };
+
+ // Notify user about the error and re-schedule the job.
+ let tracker_name = tracker.name.clone();
+ Self::try_notify_user(
+ &api,
+ tracker,
+ NotificationContentTemplate::WebPageResourcesTrackerChanges {
+ tracker_name,
+ changes_count: 0,
+ error_message: Some(
+ err.downcast::()
+ .map(|err| format!("{}", err))
+ .unwrap_or_else(|_| "Unknown error".to_string()),
+ ),
+ },
+ )
+ .await;
+ api.db
+ .set_scheduler_job_stopped_state(job_id, false)
+ .await?;
+ continue;
+ }
+ };
log::debug!(
"Successfully created web page tracker ('{}') history revision, took {}.",
tracker.id,
@@ -153,26 +175,17 @@ impl WebPageTrackersFetchJob {
.filter(|resource| resource.diff_status.is_some()),
)
.count();
- let notification_schedule_result = api
- .notifications()
- .schedule_notification(
- NotificationDestination::User(tracker.user_id),
- NotificationContent::Template(
- NotificationContentTemplate::WebPageResourcesTrackerChanges {
- tracker_name: tracker.name,
- changes_count,
- },
- ),
- OffsetDateTime::now_utc(),
- )
- .await;
- if let Err(err) = notification_schedule_result {
- log::error!(
- "Failed to schedule a notification for web page tracker ('{}'): {:?}.",
- tracker.id,
- err
- );
- }
+ let tracker_name = tracker.name.clone();
+ Self::try_notify_user(
+ &api,
+ tracker,
+ NotificationContentTemplate::WebPageResourcesTrackerChanges {
+ tracker_name,
+ changes_count,
+ error_message: None,
+ },
+ )
+ .await;
}
}
@@ -218,6 +231,25 @@ impl WebPageTrackersFetchJob {
humantime::format_duration(fetch_start.elapsed()),
err
);
+
+ // Notify user about the error and re-schedule the job.
+ let tracker_name = tracker.name.clone();
+ Self::try_notify_user(
+ &api,
+ tracker,
+ NotificationContentTemplate::WebPageContentTrackerChanges {
+ tracker_name,
+ error_message: Some(
+ err.downcast::()
+ .map(|err| format!("{}", err))
+ .unwrap_or_else(|_| "Unknown error".to_string()),
+ ),
+ },
+ )
+ .await;
+ api.db
+ .set_scheduler_job_stopped_state(job_id, false)
+ .await?;
continue;
}
};
@@ -227,26 +259,17 @@ impl WebPageTrackersFetchJob {
humantime::format_duration(fetch_start.elapsed())
);
- if tracker.settings.enable_notifications && new_revision.is_some() {
- let notification_schedule_result = api
- .notifications()
- .schedule_notification(
- NotificationDestination::User(tracker.user_id),
- NotificationContent::Template(
- NotificationContentTemplate::WebPageContentTrackerChanges {
- tracker_name: tracker.name,
- },
- ),
- OffsetDateTime::now_utc(),
- )
- .await;
- if let Err(err) = notification_schedule_result {
- log::error!(
- "Failed to schedule a notification for web page tracker ('{}'): {:?}.",
- tracker.id,
- err
- );
- }
+ if new_revision.is_some() {
+ let tracker_name = tracker.name.clone();
+ Self::try_notify_user(
+ &api,
+ tracker,
+ NotificationContentTemplate::WebPageContentTrackerChanges {
+ tracker_name,
+ error_message: None,
+ },
+ )
+ .await;
}
api.db
@@ -287,6 +310,34 @@ impl WebPageTrackersFetchJob {
Ok(Some((tracker, job_id)))
}
+
+ async fn try_notify_user(
+ api: &Api,
+ tracker: WebPageTracker,
+ template: NotificationContentTemplate,
+ ) where
+ ET::Error: EmailTransportError,
+ {
+ if !tracker.settings.enable_notifications {
+ return;
+ }
+
+ let notification_schedule_result = api
+ .notifications()
+ .schedule_notification(
+ NotificationDestination::User(tracker.user_id),
+ NotificationContent::Template(template),
+ OffsetDateTime::now_utc(),
+ )
+ .await;
+ if let Err(err) = notification_schedule_result {
+ log::error!(
+ "Failed to schedule a notification for web page tracker ('{}'): {:?}.",
+ tracker.id,
+ err
+ );
+ }
+ }
}
#[cfg(test)]
@@ -303,8 +354,9 @@ mod tests {
WebPageResourceContentData, WebPageResourcesData, WebPageResourcesTrackerTag,
WebPageTracker, WebPageTrackerCreateParams, WebPageTrackerKind, WebPageTrackerSettings,
WebScraperContentRequest, WebScraperContentRequestScripts, WebScraperContentResponse,
- WebScraperResource, WebScraperResourcesRequest, WebScraperResourcesRequestScripts,
- WebScraperResourcesResponse, WEB_PAGE_CONTENT_TRACKER_EXTRACT_SCRIPT_NAME,
+ WebScraperErrorResponse, WebScraperResource, WebScraperResourcesRequest,
+ WebScraperResourcesRequestScripts, WebScraperResourcesResponse,
+ WEB_PAGE_CONTENT_TRACKER_EXTRACT_SCRIPT_NAME,
WEB_PAGE_RESOURCES_TRACKER_FILTER_SCRIPT_NAME,
},
};
@@ -979,6 +1031,7 @@ mod tests {
WebPageResourcesTrackerChanges {
tracker_name: "tracker-one",
changes_count: 2,
+ error_message: None,
},
),
),
@@ -1002,6 +1055,172 @@ mod tests {
Ok(())
}
+ #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+ async fn schedules_notification_when_resources_change_check_fails() -> anyhow::Result<()> {
+ let mut config = mock_config()?;
+ config.jobs.web_page_trackers_fetch = Schedule::try_from(mock_schedule_in_sec(3).as_str())?;
+
+ let server = MockServer::start();
+ config.components.web_scraper_url = Url::parse(&server.base_url())?;
+
+ let user = mock_user()?;
+ let api = Arc::new(mock_api_with_config(config).await?);
+ let mut scheduler = JobScheduler::new_with_storage_and_code(
+ Box::new(SchedulerStore::new(api.db.clone())),
+ Box::::default(),
+ Box::::default(),
+ Box::::default(),
+ )
+ .await?;
+
+ // Make sure that the tracker is only run once during a single minute (2 seconds after the
+ // current second).
+ let tracker_schedule = mock_schedule_in_sec(1);
+
+ // Create user, tracker and tracker job.
+ api.users().upsert(user.clone()).await?;
+
+ let trigger_job_id = scheduler
+ .add(
+ WebPageTrackersTriggerJob::create(
+ api.clone(),
+ tracker_schedule.clone(),
+ WebPageTrackerKind::WebPageResources,
+ )
+ .await?,
+ )
+ .await?;
+ let tracker = WebPageTracker:: {
+ id: Uuid::now_v7(),
+ name: "tracker-one".to_string(),
+ url: "https://localhost:1234/my/app?q=2".parse()?,
+ settings: WebPageTrackerSettings {
+ revisions: 2,
+ schedule: Some(tracker_schedule),
+ delay: Duration::from_secs(2),
+ scripts: Default::default(),
+ enable_notifications: true,
+ },
+ user_id: user.id,
+ job_id: Some(trigger_job_id),
+ // Preserve timestamp only up to seconds.
+ created_at: OffsetDateTime::from_unix_timestamp(
+ OffsetDateTime::now_utc().unix_timestamp(),
+ )?,
+ meta: None,
+ };
+
+ // Insert tracker directly to DB to bypass schedule validation.
+ api.db
+ .web_scraping(user.id)
+ .insert_web_page_tracker(&tracker)
+ .await?;
+ api.db
+ .web_scraping(user.id)
+ .insert_web_page_tracker_history_revision::(
+ &WebPageDataRevision {
+ id: uuid!("00000000-0000-0000-0000-000000000001"),
+ tracker_id: tracker.id,
+ created_at: OffsetDateTime::from_unix_timestamp(946720700)?,
+ data: WebPageResourcesData {
+ scripts: vec![],
+ styles: vec![],
+ },
+ },
+ )
+ .await?;
+
+ // Schedule fetch job
+ scheduler
+ .add(WebPageTrackersFetchJob::create(api.clone()).await?)
+ .await?;
+
+ let resources_mock = server.mock(|when, then| {
+ when.method(httpmock::Method::POST)
+ .path("/api/web_page/resources")
+ .json_body(
+ serde_json::to_value(
+ WebScraperResourcesRequest::with_default_parameters(&tracker.url)
+ .set_delay(Duration::from_millis(2000)),
+ )
+ .unwrap(),
+ );
+ then.status(400)
+ .header("Content-Type", "application/json")
+ .json_body_obj(&WebScraperErrorResponse {
+ message: "some client-error".to_string(),
+ });
+ });
+
+ // Start scheduler and wait for a few seconds, then stop it.
+ scheduler.start().await?;
+
+ while api
+ .db
+ .get_notification_ids(
+ OffsetDateTime::now_utc().add(Duration::from_secs(3600 * 24 * 365)),
+ 10,
+ )
+ .collect::>()
+ .await
+ .is_empty()
+ {
+ thread::sleep(Duration::from_millis(100));
+ }
+
+ scheduler.shutdown().await?;
+
+ resources_mock.assert();
+
+ let mut notification_ids = api
+ .db
+ .get_notification_ids(
+ OffsetDateTime::now_utc().add(Duration::from_secs(3600 * 24 * 365)),
+ 10,
+ )
+ .collect::>()
+ .await;
+ assert_eq!(notification_ids.len(), 1);
+
+ let notification = api.db.get_notification(notification_ids.remove(0)?).await?;
+ assert_debug_snapshot!(notification.map(|notification| (notification.destination, notification.content)), @r###"
+ Some(
+ (
+ User(
+ UserId(
+ 1,
+ ),
+ ),
+ Template(
+ WebPageResourcesTrackerChanges {
+ tracker_name: "tracker-one",
+ changes_count: 0,
+ error_message: Some(
+ "some client-error",
+ ),
+ },
+ ),
+ ),
+ )
+ "###);
+
+ assert_eq!(
+ api.web_scraping()
+ .get_resources_tracker_history(user.id, tracker.id, Default::default())
+ .await?
+ .len(),
+ 1
+ );
+ assert!(!api
+ .db
+ .get_scheduler_job(trigger_job_id)
+ .await?
+ .map(|job| job.stopped)
+ .unwrap_or_default());
+
+ Ok(())
+ }
+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn schedules_notification_when_content_change() -> anyhow::Result<()> {
let mut config = mock_config()?;
@@ -1143,6 +1362,7 @@ mod tests {
Template(
WebPageContentTrackerChanges {
tracker_name: "tracker-one",
+ error_message: None,
},
),
),
@@ -1165,4 +1385,167 @@ mod tests {
Ok(())
}
+
+ #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+ async fn schedules_notification_when_content_change_check_fails() -> anyhow::Result<()> {
+ let mut config = mock_config()?;
+ config.jobs.web_page_trackers_fetch = Schedule::try_from(mock_schedule_in_sec(3).as_str())?;
+
+ let server = MockServer::start();
+ config.components.web_scraper_url = Url::parse(&server.base_url())?;
+
+ let user = mock_user()?;
+ let api = Arc::new(mock_api_with_config(config).await?);
+ let mut scheduler = JobScheduler::new_with_storage_and_code(
+ Box::new(SchedulerStore::new(api.db.clone())),
+ Box::::default(),
+ Box::::default(),
+ Box::::default(),
+ )
+ .await?;
+
+ // Make sure that the tracker is only run once during a single minute (2 seconds after the
+ // current second).
+ let tracker_schedule = mock_schedule_in_sec(1);
+
+ // Create user, tracker and tracker job.
+ api.users().upsert(user.clone()).await?;
+
+ let trigger_job_id = scheduler
+ .add(
+ WebPageTrackersTriggerJob::create(
+ api.clone(),
+ tracker_schedule.clone(),
+ WebPageTrackerKind::WebPageContent,
+ )
+ .await?,
+ )
+ .await?;
+ let tracker = WebPageTracker:: {
+ id: Uuid::now_v7(),
+ name: "tracker-one".to_string(),
+ url: "https://localhost:1234/my/app?q=2".parse()?,
+ settings: WebPageTrackerSettings {
+ revisions: 2,
+ schedule: Some(tracker_schedule),
+ delay: Duration::from_secs(2),
+ scripts: Default::default(),
+ enable_notifications: true,
+ },
+ user_id: user.id,
+ job_id: Some(trigger_job_id),
+ // Preserve timestamp only up to seconds.
+ created_at: OffsetDateTime::from_unix_timestamp(
+ OffsetDateTime::now_utc().unix_timestamp(),
+ )?,
+ meta: None,
+ };
+
+ // Insert tracker directly to DB to bypass schedule validation.
+ api.db
+ .web_scraping(user.id)
+ .insert_web_page_tracker(&tracker)
+ .await?;
+ api.db
+ .web_scraping(user.id)
+ .insert_web_page_tracker_history_revision::(
+ &WebPageDataRevision {
+ id: uuid!("00000000-0000-0000-0000-000000000001"),
+ tracker_id: tracker.id,
+ created_at: OffsetDateTime::from_unix_timestamp(946720700)?,
+ data: "some-content".to_string(),
+ },
+ )
+ .await?;
+
+ // Schedule fetch job
+ scheduler
+ .add(WebPageTrackersFetchJob::create(api.clone()).await?)
+ .await?;
+
+ let content_mock = server.mock(|when, then| {
+ when.method(httpmock::Method::POST)
+ .path("/api/web_page/content")
+ .json_body(
+ serde_json::to_value(
+ WebScraperContentRequest::with_default_parameters(&tracker.url)
+ .set_delay(Duration::from_millis(2000))
+ .set_previous_content("some-content"),
+ )
+ .unwrap(),
+ );
+ then.status(400)
+ .header("Content-Type", "application/json")
+ .json_body_obj(&WebScraperErrorResponse {
+ message: "some client-error".to_string(),
+ });
+ });
+
+ // Start scheduler and wait for a few seconds, then stop it.
+ scheduler.start().await?;
+
+ while api
+ .db
+ .get_notification_ids(
+ OffsetDateTime::now_utc().add(Duration::from_secs(3600 * 24 * 365)),
+ 10,
+ )
+ .collect::>()
+ .await
+ .is_empty()
+ {
+ thread::sleep(Duration::from_millis(100));
+ }
+
+ scheduler.shutdown().await?;
+
+ content_mock.assert();
+
+ let mut notification_ids = api
+ .db
+ .get_notification_ids(
+ OffsetDateTime::now_utc().add(Duration::from_secs(3600 * 24 * 365)),
+ 10,
+ )
+ .collect::>()
+ .await;
+ assert_eq!(notification_ids.len(), 1);
+
+ let notification = api.db.get_notification(notification_ids.remove(0)?).await?;
+ assert_debug_snapshot!(notification.map(|notification| (notification.destination, notification.content)), @r###"
+ Some(
+ (
+ User(
+ UserId(
+ 1,
+ ),
+ ),
+ Template(
+ WebPageContentTrackerChanges {
+ tracker_name: "tracker-one",
+ error_message: Some(
+ "some client-error",
+ ),
+ },
+ ),
+ ),
+ )
+ "###);
+
+ assert_eq!(
+ api.web_scraping()
+ .get_content_tracker_history(user.id, tracker.id, Default::default())
+ .await?
+ .len(),
+ 1
+ );
+ assert!(!api
+ .db
+ .get_scheduler_job(trigger_job_id)
+ .await?
+ .map(|job| job.stopped)
+ .unwrap_or_default());
+
+ Ok(())
+ }
}
diff --git a/src/utils.rs b/src/utils.rs
index 915eeb3..f15d935 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -37,8 +37,8 @@ pub use self::{
WebPageResourceContentData, WebPageResourceDiffStatus, WebPageResourcesData,
WebPageResourcesTrackerGetHistoryParams, WebPageResourcesTrackerTag, WebPageTracker,
WebPageTrackerCreateParams, WebPageTrackerKind, WebPageTrackerSettings, WebPageTrackerTag,
- WebPageTrackerUpdateParams, WebScraperContentError, WebScraperContentRequest,
- WebScraperContentRequestScripts, WebScraperContentResponse, WebScraperResource,
+ WebPageTrackerUpdateParams, WebScraperContentRequest, WebScraperContentRequestScripts,
+ WebScraperContentResponse, WebScraperErrorResponse, WebScraperResource,
WebScraperResourcesRequest, WebScraperResourcesRequestScripts, WebScraperResourcesResponse,
WEB_PAGE_CONTENT_TRACKER_EXTRACT_SCRIPT_NAME,
WEB_PAGE_RESOURCES_TRACKER_FILTER_SCRIPT_NAME,
diff --git a/src/utils/web_scraping.rs b/src/utils/web_scraping.rs
index 7ba7bf4..8d1c294 100644
--- a/src/utils/web_scraping.rs
+++ b/src/utils/web_scraping.rs
@@ -15,8 +15,8 @@ pub use self::{
WebPageResource, WebPageResourceContent, WebPageResourceContentData,
WebPageResourceDiffStatus, WebPageResourcesData, WebPageResourcesTrackerTag,
WebPageTracker, WebPageTrackerKind, WebPageTrackerSettings, WebPageTrackerTag,
- WebScraperContentError, WebScraperContentRequest, WebScraperContentRequestScripts,
- WebScraperContentResponse, WebScraperResource, WebScraperResourcesRequest,
+ WebScraperContentRequest, WebScraperContentRequestScripts, WebScraperContentResponse,
+ WebScraperErrorResponse, WebScraperResource, WebScraperResourcesRequest,
WebScraperResourcesRequestScripts, WebScraperResourcesResponse, MAX_WEB_PAGE_TRACKER_DELAY,
MAX_WEB_PAGE_TRACKER_REVISIONS,
},
diff --git a/src/utils/web_scraping/api_ext.rs b/src/utils/web_scraping/api_ext.rs
index b358c08..eef3d87 100644
--- a/src/utils/web_scraping/api_ext.rs
+++ b/src/utils/web_scraping/api_ext.rs
@@ -22,8 +22,8 @@ use crate::{
WebScraperResource, MAX_WEB_PAGE_TRACKER_DELAY, MAX_WEB_PAGE_TRACKER_REVISIONS,
},
WebPageContentTrackerTag, WebPageDataRevision, WebPageResource, WebPageResourcesData,
- WebPageResourcesTrackerTag, WebPageTracker, WebPageTrackerTag, WebScraperContentError,
- WebScraperContentRequest, WebScraperContentRequestScripts, WebScraperContentResponse,
+ WebPageResourcesTrackerTag, WebPageTracker, WebPageTrackerTag, WebScraperContentRequest,
+ WebScraperContentRequestScripts, WebScraperContentResponse, WebScraperErrorResponse,
WebScraperResourcesRequest, WebScraperResourcesRequestScripts, WebScraperResourcesResponse,
},
};
@@ -270,17 +270,47 @@ impl<'a, DR: DnsResolver, ET: EmailTransport> WebScrapingApiExt<'a, DR, ET> {
))
.json(&scraper_request)
.send()
- .await?
- .json::()
.await
.map_err(|err| {
- log::error!(
- "Cannot fetch resources for `{}` ('{}'): {:?}",
- tracker.url,
+ anyhow!(
+ "Could not connect to the web scraper service to extract resources for the web tracker ('{}'): {:?}",
+ tracker.id,
+ err
+ )
+ })?;
+
+ if !scraper_response.status().is_success() {
+ let is_client_error = scraper_response.status().is_client_error();
+ let scraper_error_response = scraper_response
+ .json::()
+ .await
+ .map_err(|err| {
+ anyhow!(
+ "Could not deserialize scraper error response for the web tracker ('{}'): {:?}",
tracker.id,
err
+ )
+ })?;
+ if is_client_error {
+ bail!(SecutilsError::client(scraper_error_response.message));
+ } else {
+ bail!(
+ "Unexpected scraper error for the web tracker ('{}'): {:?}",
+ tracker.id,
+ scraper_error_response.message
);
- anyhow!("Web page tracker cannot fetch resources due to unexpected error")
+ }
+ }
+
+ let scraper_response = scraper_response
+ .json::()
+ .await
+ .map_err(|err| {
+ anyhow!(
+ "Could not deserialize scraper response for the web tracker ('{}'): {:?}",
+ tracker.id,
+ err
+ )
})?;
// Check if there is a revision with the same timestamp. If so, drop newly fetched revision.
@@ -425,15 +455,15 @@ impl<'a, DR: DnsResolver, ET: EmailTransport> WebScrapingApiExt<'a, DR, ET> {
if !scraper_response.status().is_success() {
let is_client_error = scraper_response.status().is_client_error();
let scraper_error_response = scraper_response
- .json::()
+ .json::()
.await
.map_err(|err| {
- anyhow!(
- "Could not deserialize scraper error response for the web tracker ('{}'): {:?}",
- tracker.id,
- err
- )
- })?;
+ anyhow!(
+ "Could not deserialize scraper error response for the web tracker ('{}'): {:?}",
+ tracker.id,
+ err
+ )
+ })?;
if is_client_error {
bail!(SecutilsError::client(scraper_error_response.message));
} else {
@@ -896,11 +926,11 @@ mod tests {
utils::{
web_scraping::WebScrapingApiExt, WebPageContentTrackerGetHistoryParams,
WebPageContentTrackerTag, WebPageResource, WebPageResourceDiffStatus,
- WebPageResourcesTrackerTag, WebPageTracker, WebPageTrackerCreateParams,
- WebPageTrackerKind, WebPageTrackerSettings, WebPageTrackerUpdateParams,
- WebScraperContentError, WebScraperContentRequest, WebScraperContentResponse,
- WebScraperResource, WebScraperResourcesRequest, WebScraperResourcesResponse,
- WEB_PAGE_CONTENT_TRACKER_EXTRACT_SCRIPT_NAME,
+ WebPageResourcesTrackerGetHistoryParams, WebPageResourcesTrackerTag, WebPageTracker,
+ WebPageTrackerCreateParams, WebPageTrackerKind, WebPageTrackerSettings,
+ WebPageTrackerUpdateParams, WebScraperContentRequest, WebScraperContentResponse,
+ WebScraperErrorResponse, WebScraperResource, WebScraperResourcesRequest,
+ WebScraperResourcesResponse, WEB_PAGE_CONTENT_TRACKER_EXTRACT_SCRIPT_NAME,
WEB_PAGE_RESOURCES_TRACKER_FILTER_SCRIPT_NAME,
},
};
@@ -2591,6 +2621,80 @@ mod tests {
Ok(())
}
+ #[tokio::test]
+ async fn properly_forwards_error_if_web_page_resources_extraction_fails() -> anyhow::Result<()>
+ {
+ let server = MockServer::start();
+ let mut config = mock_config()?;
+ config.components.web_scraper_url = Url::parse(&server.base_url())?;
+
+ let api = mock_api_with_config(config).await?;
+ let mock_user = mock_user()?;
+ api.db.insert_user(&mock_user).await?;
+
+ let web_scraping = WebScrapingApiExt::new(&api);
+ let tracker = web_scraping
+ .create_resources_tracker(
+ mock_user.id,
+ WebPageTrackerCreateParams {
+ name: "name_one".to_string(),
+ url: Url::parse("https://secutils.dev/one")?,
+ settings: WebPageTrackerSettings {
+ revisions: 3,
+ delay: Duration::from_millis(2000),
+ enable_notifications: true,
+ schedule: Some("0 0 * * * *".to_string()),
+ scripts: Default::default(),
+ },
+ },
+ )
+ .await?;
+
+ let web_scraper_mock = server.mock(|when, then| {
+ when.method(httpmock::Method::POST)
+ .path("/api/web_page/resources")
+ .json_body(
+ serde_json::to_value(
+ WebScraperResourcesRequest::with_default_parameters(&tracker.url)
+ .set_delay(Duration::from_millis(2000)),
+ )
+ .unwrap(),
+ );
+ then.status(400)
+ .header("Content-Type", "application/json")
+ .json_body_obj(&WebScraperErrorResponse {
+ message: "some client-error".to_string(),
+ });
+ });
+
+ let scraper_error = web_scraping
+ .get_resources_tracker_history(
+ mock_user.id,
+ tracker.id,
+ WebPageResourcesTrackerGetHistoryParams {
+ refresh: true,
+ calculate_diff: false,
+ },
+ )
+ .await
+ .unwrap_err()
+ .downcast::()
+ .unwrap();
+ assert_eq!(scraper_error.status_code(), 400);
+ assert_debug_snapshot!(
+ scraper_error,
+ @r###""some client-error""###
+ );
+
+ let tracker_resources = web_scraping
+ .get_resources_tracker_history(mock_user.id, tracker.id, Default::default())
+ .await?;
+ assert!(tracker_resources.is_empty());
+ web_scraper_mock.assert();
+
+ Ok(())
+ }
+
#[tokio::test]
async fn properly_saves_web_page_content() -> anyhow::Result<()> {
let server = MockServer::start();
@@ -2782,7 +2886,7 @@ mod tests {
);
then.status(400)
.header("Content-Type", "application/json")
- .json_body_obj(&WebScraperContentError {
+ .json_body_obj(&WebScraperErrorResponse {
message: "some client-error".to_string(),
});
});
diff --git a/src/utils/web_scraping/web_page_trackers.rs b/src/utils/web_scraping/web_page_trackers.rs
index 53b9cae..3cb82d3 100644
--- a/src/utils/web_scraping/web_page_trackers.rs
+++ b/src/utils/web_scraping/web_page_trackers.rs
@@ -5,11 +5,12 @@ mod web_page_tracker;
mod web_page_tracker_kind;
mod web_page_tracker_settings;
mod web_page_tracker_tag;
+mod web_scraper;
pub use self::{
web_page_content::{
- WebPageContentTrackerTag, WebScraperContentError, WebScraperContentRequest,
- WebScraperContentRequestScripts, WebScraperContentResponse,
+ WebPageContentTrackerTag, WebScraperContentRequest, WebScraperContentRequestScripts,
+ WebScraperContentResponse,
},
web_page_data_revision::WebPageDataRevision,
web_page_resources::{
@@ -24,6 +25,7 @@ pub use self::{
WebPageTrackerSettings, MAX_WEB_PAGE_TRACKER_DELAY, MAX_WEB_PAGE_TRACKER_REVISIONS,
},
web_page_tracker_tag::WebPageTrackerTag,
+ web_scraper::WebScraperErrorResponse,
};
pub(in crate::utils::web_scraping) use self::web_page_resources::{
diff --git a/src/utils/web_scraping/web_page_trackers/web_page_content.rs b/src/utils/web_scraping/web_page_trackers/web_page_content.rs
index 22beb5e..337fa70 100644
--- a/src/utils/web_scraping/web_page_trackers/web_page_content.rs
+++ b/src/utils/web_scraping/web_page_trackers/web_page_content.rs
@@ -1,11 +1,9 @@
mod web_page_content_tracker_tag;
-mod web_scraper_content_error;
mod web_scraper_content_request;
mod web_scraper_content_response;
pub use self::{
web_page_content_tracker_tag::WebPageContentTrackerTag,
- web_scraper_content_error::WebScraperContentError,
web_scraper_content_request::{WebScraperContentRequest, WebScraperContentRequestScripts},
web_scraper_content_response::WebScraperContentResponse,
};
diff --git a/src/utils/web_scraping/web_page_trackers/web_scraper.rs b/src/utils/web_scraping/web_page_trackers/web_scraper.rs
new file mode 100644
index 0000000..996845d
--- /dev/null
+++ b/src/utils/web_scraping/web_page_trackers/web_scraper.rs
@@ -0,0 +1,3 @@
+mod web_scraper_error_response;
+
+pub use web_scraper_error_response::WebScraperErrorResponse;
diff --git a/src/utils/web_scraping/web_page_trackers/web_page_content/web_scraper_content_error.rs b/src/utils/web_scraping/web_page_trackers/web_scraper/web_scraper_error_response.rs
similarity index 71%
rename from src/utils/web_scraping/web_page_trackers/web_page_content/web_scraper_content_error.rs
rename to src/utils/web_scraping/web_page_trackers/web_scraper/web_scraper_error_response.rs
index 23d5166..26a1dda 100644
--- a/src/utils/web_scraping/web_page_trackers/web_page_content/web_scraper_content_error.rs
+++ b/src/utils/web_scraping/web_page_trackers/web_scraper/web_scraper_error_response.rs
@@ -1,29 +1,29 @@
use serde::{Deserialize, Serialize};
-/// Represents error response if scraper couldn't extract content.
+/// Represents an error returned by the web scraper service.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
-pub struct WebScraperContentError {
+pub struct WebScraperErrorResponse {
/// Error message.
pub message: String,
}
#[cfg(test)]
mod tests {
- use super::WebScraperContentError;
+ use super::WebScraperErrorResponse;
use insta::assert_json_snapshot;
#[test]
fn deserialization() -> anyhow::Result<()> {
assert_eq!(
- serde_json::from_str::(
+ serde_json::from_str::(
r#"
{
"message": "some-error"
}
"#
)?,
- WebScraperContentError {
+ WebScraperErrorResponse {
message: "some-error".to_string(),
}
);
@@ -33,7 +33,7 @@ mod tests {
#[test]
fn serialization() -> anyhow::Result<()> {
- assert_json_snapshot!(WebScraperContentError {
+ assert_json_snapshot!(WebScraperErrorResponse {
message: "some-error".to_string(),
}, @r###"
{
diff --git a/tools/api/utils/web_scraping_content.http b/tools/api/utils/web_scraping_content.http
new file mode 100644
index 0000000..d9f30de
--- /dev/null
+++ b/tools/api/utils/web_scraping_content.http
@@ -0,0 +1,15 @@
+### Create web page resources tracker
+POST {{host}}/api/utils/web_scraping/content
+Authorization: {{api-credentials}}
+Accept: application/json
+Content-Type: application/json
+
+{
+ "name": "HackerNewsDemo",
+ "url": "https://news.ycombinator.com/",
+ "settings": {
+ "revisions": 1,
+ "delay": 5000,
+ "enableNotifications": true
+ }
+}