Skip to content

Commit

Permalink
Recalculate synced bookmark frecencies.
Browse files Browse the repository at this point in the history
This commit recalculates stale frecencies for synced bookmark URLs,
chunking them to allow transactions on the main connection to interrupt
frecency calculation.

This commit also changes `score_recent_visits` to use the correct
timestamp unit: `now()` and `v.visit_date` are in milliseconds, not
microseconds as on Desktop.

Closes #847.
  • Loading branch information
linabutler committed Apr 23, 2019
1 parent bdcbf0b commit 8f0cb16
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 13 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@

[Full Changelog](https://github.com/mozilla/application-services/compare/v0.27.0...master)

## Places

### What's New

- Frecencies are now recalculated for bookmarked URLs after a sync.
([#847](https://github.com/mozilla/application-services/issues/847))

# v0.27.0 (_2019-04-22_)

[Full Changelog](https://github.com/mozilla/application-services/compare/v0.26.2...v0.27.0)
Expand Down
108 changes: 108 additions & 0 deletions components/places/src/bookmark_sync/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use super::{SyncedBookmarkKind, SyncedBookmarkValidity};
use crate::api::places_api::ConnectionType;
use crate::db::{PlacesDb, PlacesTransaction};
use crate::error::*;
use crate::frecency::{calculate_frecency, DEFAULT_FRECENCY_SETTINGS};
use crate::storage::{bookmarks::BookmarkRootGuid, delete_meta, get_meta, put_meta};
use crate::types::{BookmarkType, SyncGuid, SyncStatus, Timestamp};
use dogear::{
Expand All @@ -33,6 +34,12 @@ pub const LAST_SYNC_META_KEY: &str = "bookmarks_last_sync_time";
const GLOBAL_SYNCID_META_KEY: &str = "bookmarks_global_sync_id";
const COLLECTION_SYNCID_META_KEY: &str = "bookmarks_sync_id";

/// The maximum number of URLs for which to recalculate frecencies at once.
/// This is a trade-off between write efficiency and transaction time: higher
/// maximums mean fewer write statements, but longer transactions, possibly
/// blocking writes from other connections.
const MAX_FRECENCIES_TO_RECALCULATE_PER_CHUNK: usize = 400;

pub struct BookmarksStore<'a> {
pub db: &'a PlacesDb,
interruptee: &'a SqlInterruptScope,
Expand Down Expand Up @@ -425,6 +432,79 @@ impl<'a> BookmarksStore<'a> {
Ok(())
}

fn update_frecencies(&self) -> Result<()> {
let mut tx = self.db.begin_transaction()?;

let mut frecencies = Vec::with_capacity(MAX_FRECENCIES_TO_RECALCULATE_PER_CHUNK);
loop {
let sql = format!(
"SELECT place_id FROM moz_places_stale_frecencies
ORDER BY stale_at DESC
LIMIT {}",
MAX_FRECENCIES_TO_RECALCULATE_PER_CHUNK
);
let mut stmt = self.db.prepare_maybe_cached(&sql, true)?;
let mut results = stmt.query(NO_PARAMS)?;
while let Some(row) = results.next()? {
let place_id = row.get("place_id")?;
// Frecency recalculation runs several statements, so check to
// make sure we aren't interrupted before each calculation.
self.interruptee.err_if_interrupted()?;
let frecency = calculate_frecency(
&self.db,
&DEFAULT_FRECENCY_SETTINGS,
place_id,
Some(false),
)?;
frecencies.push((place_id, frecency));
}
if frecencies.is_empty() {
break;
}

// Update all frecencies in one fell swoop...
self.db.execute_batch(&format!(
"WITH frecencies(id, frecency) AS (
VALUES {}
)
UPDATE moz_places SET
frecency = (SELECT frecency FROM frecencies f
WHERE f.id = id)
WHERE id IN (SELECT f.id FROM frecencies f)",
sql_support::repeat_display(frecencies.len(), ",", |index, f| {
let (id, frecency) = frecencies[index];
write!(f, "({}, {})", id, frecency)
})
))?;
tx.maybe_commit()?;
self.interruptee.err_if_interrupted()?;

// ...And remove them from the stale table.
self.db.execute_batch(&format!(
"DELETE FROM moz_places_stale_frecencies
WHERE place_id IN ({})",
sql_support::repeat_display(frecencies.len(), ",", |index, f| {
let (id, _) = frecencies[index];
write!(f, "{}", id)
})
))?;
tx.maybe_commit()?;
self.interruptee.err_if_interrupted()?;

// If the query returned fewer URLs than the maximum, we're done.
// Otherwise, we might have more, so clear the ones we just
// recalculated and fetch the next chunk.
if frecencies.len() < MAX_FRECENCIES_TO_RECALCULATE_PER_CHUNK {
break;
}
frecencies.clear();
}

tx.commit()?;

Ok(())
}

pub fn sync(
&self,
storage_init: &Sync15StorageClientInit,
Expand Down Expand Up @@ -485,6 +565,7 @@ impl<'a> Store for BookmarksStore<'a> {
records_synced: Vec<String>,
) -> result::Result<(), failure::Error> {
self.push_synced_items(new_timestamp, records_synced)?;
self.update_frecencies()?;
Ok(())
}

Expand Down Expand Up @@ -1294,6 +1375,33 @@ mod tests {
.is_some(),
"Should mark frecency for new URL as stale"
);

let syncer = api
.open_sync_connection()
.expect("Should return Sync connection");
let interrupt_scope = syncer.begin_interrupt_scope();
let store = BookmarksStore::new(&syncer, &interrupt_scope);

store.update_frecencies().expect("Should update frecencies");

assert!(
frecency_stale_at(&reader, &Url::parse("http://example.com").unwrap())
.expect("Should check stale frecency")
.is_none(),
"Should recalculate frecency for first bookmark"
);
assert!(
frecency_stale_at(&reader, &Url::parse("http://example.com/2").unwrap())
.expect("Should check stale frecency for old URL")
.is_none(),
"Should recalculate frecency for old URL"
);
assert!(
frecency_stale_at(&reader, &Url::parse("http://example.com/2-remote").unwrap())
.expect("Should check stale frecency for new URL")
.is_none(),
"Should recalculate frecency for new URL"
);
}

#[test]
Expand Down
26 changes: 13 additions & 13 deletions components/places/src/frecency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,19 +170,19 @@ impl<'db, 's> FrecencyComputation<'db, 's> {
// In case the visit is a redirect target, calculate the frecency
// as if the original page was visited.
// If it's a redirect source, we may want to use a lower bonus.
let get_recent_visits = format!("
SELECT
IFNULL(origin.visit_type, v.visit_type) AS visit_type,
target.visit_type AS target_visit_type,
ROUND((strftime('%s','now','localtime','utc') - v.visit_date/1000000)/86400) AS age_in_days
FROM moz_historyvisits v
LEFT JOIN moz_historyvisits origin ON origin.id = v.from_visit
AND v.visit_type BETWEEN {redirect_permanent} AND {redirect_temporary}
LEFT JOIN moz_historyvisits target ON v.id = target.from_visit
AND target.visit_type BETWEEN {redirect_permanent} AND {redirect_temporary}
WHERE v.place_id = :page_id
ORDER BY v.visit_date DESC
LIMIT {max_visits}",
let get_recent_visits = format!(
"SELECT
IFNULL(origin.visit_type, v.visit_type) AS visit_type,
target.visit_type AS target_visit_type,
ROUND((now() - v.visit_date)/86400000) AS age_in_days
FROM moz_historyvisits v
LEFT JOIN moz_historyvisits origin ON origin.id = v.from_visit
AND v.visit_type BETWEEN {redirect_permanent} AND {redirect_temporary}
LEFT JOIN moz_historyvisits target ON v.id = target.from_visit
AND target.visit_type BETWEEN {redirect_permanent} AND {redirect_temporary}
WHERE v.place_id = :page_id
ORDER BY v.visit_date DESC
LIMIT {max_visits}",
redirect_permanent = VisitTransition::RedirectPermanent as u8,
redirect_temporary = VisitTransition::RedirectTemporary as u8,
max_visits = self.settings.num_visits,
Expand Down

0 comments on commit 8f0cb16

Please sign in to comment.