From a3e3e871d78319cd0f85dde1cee638d088442215 Mon Sep 17 00:00:00 2001 From: Andi Pieper Date: Thu, 24 Oct 2024 09:47:21 +0200 Subject: [PATCH] feature(cli): sync translated content (#24) --------- Co-authored-by: Florian Dieminger --- .cargo/config.toml | 1 + Cargo.lock | 1 + crates/rari-cli/main.rs | 25 +- crates/rari-doc/src/cached_readers.rs | 6 +- crates/rari-doc/src/pages/page.rs | 17 +- crates/rari-doc/src/pages/types/doc.rs | 6 +- crates/rari-doc/src/walker.rs | 3 +- crates/rari-tools/Cargo.toml | 1 + crates/rari-tools/src/add_redirect.rs | 2 +- crates/rari-tools/src/error.rs | 4 + crates/rari-tools/src/lib.rs | 1 + crates/rari-tools/src/move.rs | 13 +- crates/rari-tools/src/redirects.rs | 27 +- crates/rari-tools/src/remove.rs | 12 +- .../rari-tools/src/sync_translated_content.rs | 607 ++++++++++++++++++ crates/rari-tools/src/utils.rs | 56 +- crates/rari-tools/src/wikihistory.rs | 9 +- crates/rari-types/src/locale.rs | 37 +- crates/rari-types/src/settings.rs | 5 + 19 files changed, 750 insertions(+), 83 deletions(-) create mode 100644 crates/rari-tools/src/sync_translated_content.rs diff --git a/.cargo/config.toml b/.cargo/config.toml index c52a0c72..89338f60 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -2,3 +2,4 @@ TESTING_CONTENT_ROOT = { value = "tests/data/content/files", relative = true } TESTING_CONTENT_TRANSLATED_ROOT = { value = "tests/data/translated_content/files", relative = true } TESTING_CACHE_CONTENT = "0" +TESTING_READER_IGNORES_GITIGNORE = "1" diff --git a/Cargo.lock b/Cargo.lock index 3c8f7cd7..e0acbe75 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2700,6 +2700,7 @@ dependencies = [ "serde", "serde_json", "serial_test", + "sha2", "thiserror", "tracing", "url", diff --git a/crates/rari-cli/main.rs b/crates/rari-cli/main.rs index 8b345c4e..096a594d 100644 --- a/crates/rari-cli/main.rs +++ b/crates/rari-cli/main.rs @@ -23,7 +23,9 @@ use rari_tools::history::gather_history; use rari_tools::popularities::update_popularities; use rari_tools::r#move::r#move; use rari_tools::remove::remove; +use rari_tools::sync_translated_content::sync_translated_content; use rari_types::globals::{build_out_root, content_root, content_translated_root, SETTINGS}; +use rari_types::locale::Locale; use rari_types::settings::Settings; use self_update::cargo_crate_version; use tabwriter::TabWriter; @@ -68,13 +70,14 @@ enum ContentSubcommand { Move(MoveArgs), Delete(DeleteArgs), AddRedirect(AddRedirectArgs), + SyncTranslatedContent(SyncTranslatedContentArgs), } #[derive(Args)] struct MoveArgs { old_slug: String, new_slug: String, - locale: Option, + locale: Option, #[arg(short = 'y', long, help = "Assume yes to all prompts")] assume_yes: bool, } @@ -82,7 +85,7 @@ struct MoveArgs { #[derive(Args)] struct DeleteArgs { slug: String, - locale: Option, + locale: Option, #[arg(short, long, default_value_t = false)] recursive: bool, #[arg(long)] @@ -97,6 +100,11 @@ struct AddRedirectArgs { to_url: String, } +#[derive(Args)] +struct SyncTranslatedContentArgs { + locales: Option>, +} + #[derive(Args)] struct UpdateArgs { #[arg(long)] @@ -349,17 +357,12 @@ fn main() -> Result<(), Error> { } Commands::Content(content_subcommand) => match content_subcommand { ContentSubcommand::Move(args) => { - r#move( - &args.old_slug, - &args.new_slug, - args.locale.as_deref(), - args.assume_yes, - )?; + r#move(&args.old_slug, &args.new_slug, args.locale, args.assume_yes)?; } ContentSubcommand::Delete(args) => { remove( &args.slug, - args.locale.as_deref(), + args.locale, args.recursive, args.redirect.as_deref(), args.assume_yes, @@ -368,6 +371,10 @@ fn main() -> Result<(), Error> { ContentSubcommand::AddRedirect(args) => { add_redirect(&args.from_url, &args.to_url)?; } + ContentSubcommand::SyncTranslatedContent(args) => { + let locales = args.locales.as_deref().unwrap_or(Locale::translated()); + sync_translated_content(locales, cli.verbose.is_present())?; + } }, Commands::Update(args) => update(args.version)?, } diff --git a/crates/rari-doc/src/cached_readers.rs b/crates/rari-doc/src/cached_readers.rs index 34c1a67f..1d6f0edc 100644 --- a/crates/rari-doc/src/cached_readers.rs +++ b/crates/rari-doc/src/cached_readers.rs @@ -318,17 +318,17 @@ pub fn read_and_cache_doc_pages() -> Result, DocError> { ) .unwrap(); if let Some(translated_root) = content_translated_root() { - let transted_docs = read_docs_parallel::(&[translated_root], None)?; + let translated_docs = read_docs_parallel::(&[translated_root], None)?; STATIC_DOC_PAGE_TRANSLATED_FILES .set( - transted_docs + translated_docs .iter() .cloned() .map(|doc| ((doc.locale(), Cow::Owned(doc.slug().to_string())), doc)) .collect(), ) .unwrap(); - docs.extend(transted_docs) + docs.extend(translated_docs) } init_translations_from_static_docs(); STATIC_DOC_PAGE_FILES_BY_PATH diff --git a/crates/rari-doc/src/pages/page.rs b/crates/rari-doc/src/pages/page.rs index 6f300f1e..0e96aed1 100644 --- a/crates/rari-doc/src/pages/page.rs +++ b/crates/rari-doc/src/pages/page.rs @@ -42,13 +42,26 @@ pub enum PageCategory { impl Page { pub fn from_url(url: &str) -> Result { - Self::from_url_with_other_locale_and_fallback(url, None) + Self::internal_from_url_with_other_locale_and_fallback(url, None, true) + } + + pub fn from_url_no_fallback(url: &str) -> Result { + Self::internal_from_url_with_other_locale_and_fallback(url, None, false) } pub fn from_url_with_other_locale_and_fallback( url: &str, locale: Option, ) -> Result { + Self::internal_from_url_with_other_locale_and_fallback(url, locale, true) + } + + fn internal_from_url_with_other_locale_and_fallback( + url: &str, + locale: Option, + fallback: bool, + ) -> Result { + let url = &url[..url.find('#').unwrap_or(url.len())]; let UrlMeta { folder_path, slug, @@ -61,7 +74,7 @@ impl Page { .ok_or(DocError::PageNotFound(url.to_string(), PageCategory::SPA)), PageCategory::Doc => { let doc = Doc::page_from_slug_path(&folder_path, locale); - if doc.is_err() && locale != Default::default() { + if doc.is_err() && locale != Default::default() && fallback { Doc::page_from_slug_path(&folder_path, Default::default()) } else { doc diff --git a/crates/rari-doc/src/pages/types/doc.rs b/crates/rari-doc/src/pages/types/doc.rs index 28262624..d8d8e725 100644 --- a/crates/rari-doc/src/pages/types/doc.rs +++ b/crates/rari-doc/src/pages/types/doc.rs @@ -35,6 +35,10 @@ use crate::utils::{ ] */ +fn is_page_type_none(page_type: &PageType) -> bool { + matches!(page_type, PageType::None) +} + #[derive(Deserialize, Serialize, Clone, Debug, Default, Validate)] #[serde(default)] pub struct FrontMatter { @@ -46,7 +50,7 @@ pub struct FrontMatter { #[serde(default, skip_serializing_if = "Vec::is_empty")] pub tags: Vec, pub slug: String, - #[serde(rename = "page-type")] + #[serde(rename = "page-type", skip_serializing_if = "is_page_type_none")] pub page_type: PageType, #[serde( deserialize_with = "t_or_vec", diff --git a/crates/rari-doc/src/walker.rs b/crates/rari-doc/src/walker.rs index a695364a..638ecf37 100644 --- a/crates/rari-doc/src/walker.rs +++ b/crates/rari-doc/src/walker.rs @@ -2,7 +2,7 @@ use std::path::Path; use ignore::types::TypesBuilder; use ignore::WalkBuilder; -use rari_types::globals::{content_root, content_translated_root}; +use rari_types::globals::{content_root, content_translated_root, settings}; pub fn walk_builder( paths: &[impl AsRef], @@ -25,6 +25,7 @@ pub fn walk_builder( } builder }; + builder.git_ignore(!settings().reader_ignores_gitignore); builder.types(types.build()?); Ok(builder) } diff --git a/crates/rari-tools/Cargo.toml b/crates/rari-tools/Cargo.toml index 759dfab1..3aff34f8 100644 --- a/crates/rari-tools/Cargo.toml +++ b/crates/rari-tools/Cargo.toml @@ -23,6 +23,7 @@ rayon.workspace = true console = "0" dialoguer = "0" csv = "1" +sha2 = "0.10" [dev-dependencies] serial_test = { version = "3", features = ["file_locks"] } diff --git a/crates/rari-tools/src/add_redirect.rs b/crates/rari-tools/src/add_redirect.rs index c21c074a..06a65bbe 100644 --- a/crates/rari-tools/src/add_redirect.rs +++ b/crates/rari-tools/src/add_redirect.rs @@ -86,7 +86,7 @@ mod test { use super::*; use crate::tests::fixtures::docs::DocFixtures; use crate::tests::fixtures::redirects::RedirectFixtures; - use crate::utils::test_utils::get_redirects_map; + use crate::utils::get_redirects_map; #[test] fn test_add_redirect() { diff --git a/crates/rari-tools/src/error.rs b/crates/rari-tools/src/error.rs index 66e0125d..3ac01671 100644 --- a/crates/rari-tools/src/error.rs +++ b/crates/rari-tools/src/error.rs @@ -12,6 +12,10 @@ pub enum ToolError { InvalidSlug(Cow<'static, str>), #[error("Invalid url: {0}")] InvalidUrl(Cow<'static, str>), + #[error("Invalid locale: {0}")] + InvalidLocale(Cow<'static, str>), + #[error("Orphaned doc exists: {0}")] + OrphanedDocExists(Cow<'static, str>), #[error("Git error: {0}")] GitError(String), diff --git a/crates/rari-tools/src/lib.rs b/crates/rari-tools/src/lib.rs index 68d182da..67e9792c 100644 --- a/crates/rari-tools/src/lib.rs +++ b/crates/rari-tools/src/lib.rs @@ -6,6 +6,7 @@ pub mod r#move; pub mod popularities; pub mod redirects; pub mod remove; +pub mod sync_translated_content; #[cfg(test)] pub mod tests; mod utils; diff --git a/crates/rari-tools/src/move.rs b/crates/rari-tools/src/move.rs index d1c66a6b..6a68b19a 100644 --- a/crates/rari-tools/src/move.rs +++ b/crates/rari-tools/src/move.rs @@ -2,7 +2,6 @@ use std::borrow::Cow; use std::ffi::OsStr; use std::fs::create_dir_all; use std::path::PathBuf; -use std::str::FromStr; use std::sync::Arc; use console::{style, Style}; @@ -25,15 +24,11 @@ use crate::wikihistory::update_wiki_history; pub fn r#move( old_slug: &str, new_slug: &str, - locale: Option<&str>, + locale: Option, assume_yes: bool, ) -> Result<(), ToolError> { validate_args(old_slug, new_slug)?; - let locale = if let Some(l) = locale { - Locale::from_str(l)? - } else { - Locale::default() - }; + let locale = locale.unwrap_or_default(); // Make a dry run to give some feedback on what would be done let green = Style::new().green(); @@ -244,7 +239,8 @@ mod test { use crate::tests::fixtures::docs::DocFixtures; use crate::tests::fixtures::redirects::RedirectFixtures; use crate::tests::fixtures::wikihistory::WikihistoryFixtures; - use crate::utils::test_utils::{check_file_existence, get_redirects_map}; + use crate::utils::get_redirects_map; + use crate::utils::test_utils::check_file_existence; fn s(s: &str) -> String { s.to_string() @@ -354,7 +350,6 @@ mod test { Locale::EnUs, false, ); - println!("result: {:?}", result); assert!(result.is_ok()); let result = result.unwrap(); assert!(result.len() == 3); diff --git a/crates/rari-tools/src/redirects.rs b/crates/rari-tools/src/redirects.rs index a8b8d32a..535f34f0 100644 --- a/crates/rari-tools/src/redirects.rs +++ b/crates/rari-tools/src/redirects.rs @@ -3,7 +3,7 @@ use std::cmp::Ordering; use std::collections::{BTreeMap, HashMap, HashSet}; use std::fs::File; use std::io::{self, BufRead, BufWriter, Write}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::str::FromStr; use rari_doc::pages::page::{Page, PageLike}; @@ -212,10 +212,7 @@ pub fn add_redirects(locale: Locale, update_pairs: &[(String, String)]) -> Resul // Read the redirects file for the locale and populate the map. let mut pairs = HashMap::new(); - let path = root_for_locale(locale)? - .to_path_buf() - .join(locale.as_folder_str()) - .join("_redirects.txt"); + let path = redirects_path(locale)?; if let Err(e) = read_redirects_raw(&path, &mut pairs) { error!("Error reading redirects: {e}"); @@ -245,6 +242,12 @@ pub fn add_redirects(locale: Locale, update_pairs: &[(String, String)]) -> Resul Ok(()) } +/// Gets the path to the redirects file for a specific locale. +pub(crate) fn redirects_path(locale: Locale) -> Result { + let root = root_for_locale(locale)?; + Ok(root.join(locale.as_folder_str()).join("_redirects.txt")) +} + /// Validates a list of redirect pairs. /// /// Iterates through each `(from, to)` pair and validates both URLs based on the locale. @@ -318,8 +321,8 @@ fn validate_from_url(url: &str, locale: Locale) -> Result<(), ToolError> { check_url_invalid_symbols(&url)?; - // Check for existing file/folder, commented for now - if let Ok(page) = Page::from_url(&url) { + // Check for existing file/folder. + if let Ok(page) = Page::from_url_no_fallback(&url) { return Err(ToolError::InvalidRedirectFromURL(format!( "From-URL '{}' resolves to an existing folder at '{}'.", url, @@ -857,16 +860,6 @@ mod tests { assert!(result.is_err()); } - #[test] - fn test_validate_to_url_diff_locale() { - let slugs = vec!["A".to_string()]; - let _docs = DocFixtures::new(&slugs, Locale::EnUs); - let url = "/en-US/docs/A"; - let result = validate_to_url(url); - println!("{:?}", result); - assert!(result.is_ok()); - } - #[test] fn test_validate_from_url_happy_path() { let url = "/en-US/docs/A"; diff --git a/crates/rari-tools/src/remove.rs b/crates/rari-tools/src/remove.rs index cb2dff29..f906be60 100644 --- a/crates/rari-tools/src/remove.rs +++ b/crates/rari-tools/src/remove.rs @@ -2,7 +2,6 @@ use std::borrow::Cow; use std::collections::BTreeSet; use std::ffi::OsStr; use std::path::PathBuf; -use std::str::FromStr; use console::Style; use dialoguer::theme::ColorfulTheme; @@ -24,17 +23,13 @@ use crate::wikihistory::delete_from_wiki_history; pub fn remove( slug: &str, - locale: Option<&str>, + locale: Option, recursive: bool, redirect: Option<&str>, assume_yes: bool, ) -> Result<(), ToolError> { validate_args(slug)?; - let locale = if let Some(l) = locale { - Locale::from_str(l)? - } else { - Locale::default() - }; + let locale = locale.unwrap_or_default(); let green = Style::new().green(); let red = Style::new().red(); @@ -270,7 +265,8 @@ mod test { use crate::tests::fixtures::docs::DocFixtures; use crate::tests::fixtures::redirects::RedirectFixtures; use crate::tests::fixtures::wikihistory::WikihistoryFixtures; - use crate::utils::test_utils::{check_file_existence, get_redirects_map}; + use crate::utils::get_redirects_map; + use crate::utils::test_utils::check_file_existence; use crate::wikihistory::test_get_wiki_history; #[test] diff --git a/crates/rari-tools/src/sync_translated_content.rs b/crates/rari-tools/src/sync_translated_content.rs new file mode 100644 index 00000000..331bbdde --- /dev/null +++ b/crates/rari-tools/src/sync_translated_content.rs @@ -0,0 +1,607 @@ +use std::borrow::Cow; +use std::collections::HashMap; +use std::ffi::OsStr; + +use console::Style; +use rari_doc::pages::page::{Page, PageCategory, PageLike, PageWriter}; +use rari_doc::pages::types::doc::Doc; +use rari_doc::resolve::{build_url, url_to_folder_path}; +use rari_doc::utils::root_for_locale; +use rari_types::locale::Locale; +use rari_utils::concat_strs; +use sha2::{Digest, Sha256}; + +use crate::error::ToolError; +use crate::git::exec_git_with_test_fallback; +use crate::redirects::add_redirects; +use crate::utils::{get_redirects_map, read_all_doc_pages}; +use crate::wikihistory::update_wiki_history; + +pub fn sync_translated_content( + locales: &[Locale], + verbose: bool, +) -> Result, ToolError> { + validate_locales(locales)?; + + let green = Style::new().green(); + let dim = Style::new().dim(); + let bold = Style::new().bold(); + + if verbose { + println!( + "{}", + green.apply_to(format!( + "Syncing translated content for locales: {:?}. Reading documents.", + locales + )), + ); + } + + let docs = read_all_doc_pages()?; + let redirects_maps: HashMap> = locales + .iter() + .chain(std::iter::once(&Locale::EnUs)) + .map(|locale| { + ( + *locale, + get_redirects_map(*locale) + .iter() + .map(|(k, v)| (k.to_lowercase(), v.to_string())) + .collect(), + ) + }) + .collect(); + + if verbose { + let (doc_count, translated_doc_count) = + docs.iter() + .fold((0, 0), |(x, y), ((locale, _slug), _page)| { + if *locale == Locale::EnUs { + (x + 1, y) + } else { + (x, y + 1) + } + }); + println!( + "{}", + dim.apply_to(format!( + "read {} docs: {} en-Us, {} translated.", + docs.len(), + doc_count, + translated_doc_count + )) + ); + } + let results = HashMap::new(); + + let res = docs + .iter() + .filter(|&((locale, _), _doc)| locales.contains(locale)) + .fold(results, |mut results, ((locale, _), page)| { + if let Page::Doc(doc) = page { + let status = sync_translated_document(&docs, &redirects_maps, doc, verbose); + if let Ok(status) = status { + let result: &mut SyncTranslatedContentResult = + results.entry(*locale).or_default(); + result.add_status(status); + } else { + tracing::error!( + "Error syncing translated content for {} ({}): {:?}", + doc.slug(), + locale, + status + ); + } + } else { + println!("Page is not a doc: {:?}", page); + } + results + }); + + // Add redirects contained in the results to the proper locale redirects files + // and modify wiki history files if needed. + for (locale, result) in &res { + let redirect_pairs = result + .redirects + .iter() + .map(|(from, to)| (from.to_string(), to.to_string())) + .collect::>(); + add_redirects(*locale, &redirect_pairs)?; + let wiki_history_pairs = result + .wiki_history + .iter() + .map(|(from, to)| (from.to_string(), to.to_string())) + .collect::>(); + update_wiki_history(*locale, &wiki_history_pairs)?; + } + + if verbose { + for (locale, result) in &res { + println!( + "{}", + green.apply_to(bold.apply_to(format!("Results for locale {}", locale))) + ); + println!( + " {}", + green.apply_to(format!("Total of {} documents.", result.total_docs)) + ); + println!( + " {}", + green.apply_to(format!("Moved {} documents.", result.moved_docs)) + ); + println!( + " {}", + green.apply_to(format!("Renamed {} documents.", result.renamed_docs)) + ); + println!( + " {}", + green.apply_to(format!( + "Conflicting {} documents.", + result.conflicting_docs + )) + ); + println!( + " {}", + green.apply_to(format!("Orphaned {} documents", result.orphaned_docs)) + ); + println!( + " {}", + green.apply_to(format!( + "Fixed {} redirected documents.", + result.redirected_docs + )) + ); + } + } + Ok(res) +} + +#[derive(Debug, Default)] +pub struct SyncTranslatedContentResult { + pub moved_docs: usize, + pub conflicting_docs: usize, + pub orphaned_docs: usize, + pub redirected_docs: usize, + pub renamed_docs: usize, + pub total_docs: usize, + pub redirects: HashMap, + pub wiki_history: HashMap, +} + +impl SyncTranslatedContentResult { + pub fn add_status(&mut self, status: SyncTranslatedDocumentStatus) { + self.moved_docs += status.moved as usize; + self.conflicting_docs += status.conflicting as usize; + self.orphaned_docs += status.orphaned as usize; + self.redirected_docs += status.followed as usize; + self.renamed_docs += status.renamed as usize; + self.total_docs += 1; + if let Some((from, to)) = status.redirect { + self.redirects.insert(from, to); + } + if let Some((old, current)) = status.wiki_history { + self.wiki_history.insert(old, current); + } + } +} + +#[derive(Debug, Default)] +pub struct SyncTranslatedDocumentStatus { + pub redirect: Option<(String, String)>, + pub wiki_history: Option<(String, String)>, + pub conflicting: bool, + pub followed: bool, + pub moved: bool, + pub orphaned: bool, + pub renamed: bool, +} + +fn sync_translated_document( + docs: &HashMap<(Locale, Cow<'_, str>), Page>, + redirect_maps: &HashMap>, + doc: &Doc, + verbose: bool, +) -> Result { + let mut status = SyncTranslatedDocumentStatus::default(); + + let dim = Style::new().dim(); + let yellow = Style::new().yellow(); + + if doc.is_orphaned() || doc.is_conflicting() { + return Ok(status); + } + + let resolved_slug = resolve(redirect_maps, doc.slug()); + + status.renamed = doc.slug() != resolved_slug; + status.moved = status.renamed && doc.slug().to_lowercase() != resolved_slug.to_lowercase(); + + if status.moved { + status.followed = true; + } + + let mut resolved_slug = if let Some((url, _)) = resolved_slug.split_once('#') { + Cow::Borrowed(url) + } else { + resolved_slug + }; + + let resolved_doc = docs.get(&(Locale::EnUs, resolved_slug.clone())); + status.orphaned = resolved_doc.is_none(); + + if !status.renamed && !status.orphaned { + return Ok(status); + } + + if status.orphaned { + if verbose { + println!( + "{}", + yellow.apply_to(format!("orphaned: {}", doc.path().to_string_lossy())) + ); + } + status.followed = false; + status.moved = true; + resolved_slug = concat_strs!("orphaned/", &resolved_slug).into(); + let orphaned_doc = docs.get(&(doc.locale(), resolved_slug.clone())); + if orphaned_doc.is_some() { + return Err(ToolError::OrphanedDocExists(Cow::Owned(format!( + "{} → {}", + doc.slug(), + resolved_slug + )))); + } + } else if status.moved && md_exists(&resolved_slug, doc.locale())? { + if verbose { + println!( + "{}", + dim.apply_to(format!( + "unrooting {} (conflicting translation)", + doc.path().to_string_lossy() + )) + ); + } + if resolved_doc.is_some() { + // Set the slug to a /conflicting /... slug. if that already + // exists (possibly from a previous move on this run), + // append a sha256 hash of the original slug to the slug. + resolved_slug = concat_strs!("conflicting/", &resolved_slug).into(); + if md_exists(&resolved_slug, doc.locale())? { + let hash = Sha256::digest(doc.slug().as_bytes()); + let digest = format!("{:x}", hash); + resolved_slug = concat_strs!(&resolved_slug, "_", &digest).into(); + } + + status.conflicting = true; + } else { + return Err(ToolError::Unknown("Conflicting docs not found")); + } + } + + // Add entries to the redirects and wiki history maps + status.redirect = Some(( + build_url(doc.slug(), doc.locale(), PageCategory::Doc)?, + build_url(&resolved_slug, doc.locale(), PageCategory::Doc)?, + )); + status.wiki_history = Some((doc.slug().to_string(), resolved_slug.to_string())); + + // Write and then move the doc to the new location. + // Also set `original_slug` in metadata. + if status.moved { + write_and_move_doc(doc, &resolved_slug)?; + } + + Ok(status) +} + +fn write_and_move_doc(doc: &Doc, target_slug: &str) -> Result<(), ToolError> { + let source_path = doc.path(); + let target_directory = root_for_locale(doc.locale())? + .join(doc.locale().as_folder_str()) + .join(url_to_folder_path(target_slug)); + std::fs::create_dir_all(&target_directory)?; + + // Write the new slug, store the old slug in `original_slug` metadata + let mut new_doc = doc.clone(); + new_doc.meta.slug = target_slug.to_owned(); + new_doc.meta.original_slug = Some(doc.slug().to_owned()); + new_doc.write()?; + + // Move the file with git + let output = exec_git_with_test_fallback( + &[ + OsStr::new("mv"), + source_path.as_os_str(), + target_directory.as_os_str(), + ], + root_for_locale(doc.locale())?, + ); + + if !output.status.success() { + return Err(ToolError::GitError(format!( + "Failed to move files: {}", + String::from_utf8_lossy(&output.stderr) + ))); + } + + // If the source directory is empty, remove it with the fs api. + let source_directory = doc.full_path().parent().unwrap(); + if source_directory + .read_dir() + .map(|mut dir| dir.next().is_none()) + .unwrap_or(false) + { + std::fs::remove_dir(source_directory)?; + } + + Ok(()) +} + +/// Check if a markdown file exists for a given slug and locale. +fn md_exists(slug: &str, locale: Locale) -> Result { + let folder_path = root_for_locale(locale)? + .join(locale.as_folder_str()) + .join(url_to_folder_path(slug)); + let md_path = folder_path.join("index.md"); + Ok(md_path.exists()) +} + +fn resolve<'a>( + redirect_maps: &'a HashMap>, + slug: &'a str, +) -> Cow<'a, str> { + let en_us_url_lc = + concat_strs!("/", Locale::EnUs.as_folder_str(), "/docs/", slug).to_lowercase(); + // Note: Contrary to the yari original, we skip the fundamental redirects because + // those have no role to play any more in this use case. + + let resolved_url = redirect_maps + .get(&Locale::EnUs) + .expect("Redirect map for locale not loaded") + .get(&en_us_url_lc) + .unwrap_or(&en_us_url_lc); + + let page = Page::from_url(resolved_url); + if let Ok(page) = page { + if page.slug() != slug { + return Cow::Owned(page.slug().to_string()); + } + } + Cow::Borrowed(slug) +} + +fn validate_locales(locales: &[Locale]) -> Result<(), ToolError> { + if locales.is_empty() { + return Err(ToolError::InvalidLocale(Cow::Borrowed( + "Locales cannot be empty", + ))); + } + if locales.contains(&Locale::EnUs) { + return Err(ToolError::InvalidLocale(Cow::Borrowed( + "Locales cannot contain en-us", + ))); + } + Ok(()) +} + +#[cfg(test)] +use serial_test::file_serial; +#[cfg(test)] +#[file_serial(file_fixtures)] +mod test { + + use rari_types::globals::content_translated_root; + + use super::*; + use crate::redirects::{read_redirects_raw, redirects_path}; + use crate::tests::fixtures::docs::DocFixtures; + use crate::tests::fixtures::redirects::RedirectFixtures; + use crate::tests::fixtures::wikihistory::WikihistoryFixtures; + use crate::wikihistory::read_wiki_history; + + #[test] + fn test_valid_sync_locales() { + let result = validate_locales(&[Locale::PtBr, Locale::ZhCn, Locale::Ru]); + assert!(result.is_ok()); + let result = validate_locales(&[]); + assert!(result.is_err()); + let result = validate_locales(&[Locale::EnUs, Locale::PtBr]); + assert!(result.is_err()); + } + + #[test] + fn test_sync_translated_content_no_changes() { + let en_slugs = vec![ + "Web/API/Other".to_string(), + "Web/API/ExampleOne".to_string(), + "Web/API/ExampleTwo".to_string(), + "Web/API/SomethingElse".to_string(), + ]; + let en_redirects = vec![( + "docs/Web/API/OldExampleOne".to_string(), + "docs/Web/API/ExampleOne".to_string(), + )]; + let _en_docs = DocFixtures::new(&en_slugs, Locale::EnUs); + let _en_redirects = RedirectFixtures::new(&en_redirects, Locale::EnUs); + let _en_wikihistory = WikihistoryFixtures::new(&en_slugs, Locale::EnUs); + + let es_slugs = vec![ + "Web/API/Other".to_string(), + "Web/API/ExampleOne".to_string(), + "Web/API/ExampleTwo".to_string(), + "Web/API/SomethingElse".to_string(), + ]; + let es_redirects = vec![( + "docs/Web/API/OldExampleOne".to_string(), + "docs/Web/API/ExampleOne".to_string(), + )]; + let _es_docs = DocFixtures::new(&es_slugs, Locale::Es); + let _es_redirects = RedirectFixtures::new(&es_redirects, Locale::Es); + let _es_wikihistory = WikihistoryFixtures::new(&es_slugs, Locale::Es); + + let result = sync_translated_content(&[Locale::Es], false); + assert!(result.is_ok()); + let result = result.unwrap(); + assert_eq!(result.len(), 1); + + let es_result = result.get(&Locale::Es); + assert!(es_result.is_some()); + let es_result = es_result.unwrap(); + assert_eq!(es_result.total_docs, 6); + assert_eq!(es_result.moved_docs, 0); + assert_eq!(es_result.conflicting_docs, 0); + assert_eq!(es_result.orphaned_docs, 0); + assert_eq!(es_result.redirected_docs, 0); + assert_eq!(es_result.renamed_docs, 0); + assert_eq!(es_result.redirects.len(), 0); + } + + #[test] + fn test_sync_translated_content_orphaned() { + let en_slugs = vec![ + "Web/API/ExampleOne".to_string(), + "Web/API/ExampleTwo".to_string(), + "Web/API/SomethingElse".to_string(), + ]; + let en_redirects = vec![( + "docs/Web/API/OldExampleOne".to_string(), + "docs/Web/API/ExampleOne".to_string(), + )]; + let _en_docs = DocFixtures::new(&en_slugs, Locale::EnUs); + let _en_redirects = RedirectFixtures::new(&en_redirects, Locale::EnUs); + let _en_wikihistory = WikihistoryFixtures::new(&en_slugs, Locale::EnUs); + + let es_slugs = vec![ + "Web/API/Other".to_string(), + "Web/API/ExampleOne".to_string(), + "Web/API/ExampleTwo".to_string(), + "Web/API/SomethingElse".to_string(), + ]; + let es_redirects = vec![( + "docs/Web/API/OldExampleOne".to_string(), + "docs/Web/API/ExampleOne".to_string(), + )]; + let _es_docs = DocFixtures::new(&es_slugs, Locale::Es); + let _es_redirects = RedirectFixtures::new(&es_redirects, Locale::Es); + let _es_wikihistory = WikihistoryFixtures::new(&es_slugs, Locale::Es); + + let result = sync_translated_content(&[Locale::Es], false); + assert!(result.is_ok()); + let result = result.unwrap(); + assert_eq!(result.len(), 1); + + let es_result = result.get(&Locale::Es); + assert!(es_result.is_some()); + let es_result = es_result.unwrap(); + assert_eq!(es_result.total_docs, 6); + assert_eq!(es_result.moved_docs, 1); + assert_eq!(es_result.conflicting_docs, 0); + assert_eq!(es_result.orphaned_docs, 1); + assert_eq!(es_result.redirected_docs, 0); + assert_eq!(es_result.renamed_docs, 0); + assert_eq!(es_result.redirects.len(), 1); + + let translated_root = content_translated_root().unwrap(); + let orphaned_original_path = translated_root + .join(Locale::Es.as_folder_str()) + .join("web") + .join("api") + .join("other") + .join("index.md"); + assert!(!orphaned_original_path.exists()); + let orphaned_path = translated_root + .join(Locale::Es.as_folder_str()) + .join("orphaned") + .join("web") + .join("api") + .join("other") + .join("index.md"); + assert!(orphaned_path.exists()); + + let mut redirects = HashMap::new(); + read_redirects_raw( + redirects_path(Locale::Es).unwrap().as_path(), + &mut redirects, + ) + .unwrap(); + assert_eq!(redirects.len(), 2); + assert_eq!( + redirects.get("/es/docs/Web/API/Other").unwrap(), + "/es/docs/orphaned/Web/API/Other" + ); + + let wiki_history = read_wiki_history(Locale::Es).unwrap(); + assert!(wiki_history.contains_key("orphaned/Web/API/Other")); + } + + #[test] + fn test_sync_translated_content_moved() { + let en_slugs = vec![ + "Web/API/OtherMoved".to_string(), + "Web/API/ExampleOne".to_string(), + ]; + let en_redirects = vec![( + "docs/Web/API/Other".to_string(), + "docs/Web/API/OtherMoved".to_string(), + )]; + let _en_docs = DocFixtures::new(&en_slugs, Locale::EnUs); + let _en_redirects = RedirectFixtures::new(&en_redirects, Locale::EnUs); + let _en_wikihistory = WikihistoryFixtures::new(&en_slugs, Locale::EnUs); + + let es_slugs = vec![ + "Web/API/Other".to_string(), + "Web/API/ExampleOne".to_string(), + ]; + let es_redirects = vec![]; + let _es_docs = DocFixtures::new(&es_slugs, Locale::Es); + let _es_redirects = RedirectFixtures::new(&es_redirects, Locale::Es); + let _es_wikihistory = WikihistoryFixtures::new(&es_slugs, Locale::Es); + + let result = sync_translated_content(&[Locale::Es], false); + assert!(result.is_ok()); + let result = result.unwrap(); + assert_eq!(result.len(), 1); + + let es_result = result.get(&Locale::Es); + assert!(es_result.is_some()); + let es_result = es_result.unwrap(); + assert_eq!(es_result.total_docs, 4); + assert_eq!(es_result.moved_docs, 1); + assert_eq!(es_result.conflicting_docs, 0); + assert_eq!(es_result.orphaned_docs, 0); + assert_eq!(es_result.redirected_docs, 1); + assert_eq!(es_result.renamed_docs, 1); + assert_eq!(es_result.redirects.len(), 1); + + let translated_root = content_translated_root().unwrap(); + let moved_original_path = translated_root + .join(Locale::Es.as_folder_str()) + .join("web") + .join("api") + .join("other") + .join("index.md"); + assert!(!moved_original_path.exists()); + let moved_path = translated_root + .join(Locale::Es.as_folder_str()) + .join("web") + .join("api") + .join("othermoved") + .join("index.md"); + assert!(moved_path.exists()); + + let mut redirects = HashMap::new(); + read_redirects_raw( + redirects_path(Locale::Es).unwrap().as_path(), + &mut redirects, + ) + .unwrap(); + assert_eq!(redirects.len(), 1); + assert_eq!( + redirects.get("/es/docs/Web/API/Other").unwrap(), + "/es/docs/Web/API/OtherMoved" + ); + + let wiki_history = read_wiki_history(Locale::Es).unwrap(); + assert!(wiki_history.contains_key("Web/API/OtherMoved")); + } +} diff --git a/crates/rari-tools/src/utils.rs b/crates/rari-tools/src/utils.rs index 71814051..dd7ad558 100644 --- a/crates/rari-tools/src/utils.rs +++ b/crates/rari-tools/src/utils.rs @@ -1,6 +1,15 @@ use std::borrow::Cow; +use std::collections::HashMap; + +use rari_doc::error::DocError; +use rari_doc::pages::page::{Page, PageLike}; +use rari_doc::pages::types::doc::Doc; +use rari_doc::reader::read_docs_parallel; +use rari_types::globals::{content_root, content_translated_root}; +use rari_types::locale::Locale; use crate::error::ToolError; +use crate::redirects::{self, redirects_path}; pub(crate) fn parent_slug(slug: &str) -> Result<&str, ToolError> { let slug = slug.trim_end_matches('/'); @@ -11,15 +20,39 @@ pub(crate) fn parent_slug(slug: &str) -> Result<&str, ToolError> { } } +/// Read all en-US and translated documents into a hash, with a key of `(locale, slug)`. +/// This is similar to the `cached_reader` functionality, but not wrapped in a `onceLock`. +pub(crate) fn read_all_doc_pages() -> Result), Page>, DocError> { + let docs = read_docs_parallel::(&[content_root()], None)?; + let mut docs_hash: HashMap<(Locale, Cow<'_, str>), Page> = docs + .iter() + .cloned() + .map(|doc| ((doc.locale(), Cow::Owned(doc.slug().to_string())), doc)) + .collect(); + + if let Some(translated_root) = content_translated_root() { + let translated_docs = read_docs_parallel::(&[translated_root], None)?; + docs_hash.extend( + translated_docs + .iter() + .cloned() + .map(|doc| ((doc.locale(), Cow::Owned(doc.slug().to_string())), doc)), + ) + } + Ok(docs_hash) +} + +pub(crate) fn get_redirects_map(locale: Locale) -> HashMap { + let redirects_path = redirects_path(locale).unwrap(); + let mut redirects = HashMap::new(); + redirects::read_redirects_raw(&redirects_path, &mut redirects).unwrap(); + redirects +} + #[cfg(test)] pub mod test_utils { - use std::collections::HashMap; - use std::path::{Path, PathBuf}; - - use rari_doc::utils::root_for_locale; - use rari_types::locale::Locale; + use std::path::Path; - use crate::redirects; pub(crate) fn check_file_existence( root: &Path, should_exist: &[&str], @@ -45,15 +78,4 @@ pub mod test_utils { assert!(!path.exists(), "{} should not exist", path.display()); } } - - pub(crate) fn get_redirects_map(locale: Locale) -> HashMap { - let root_path = root_for_locale(locale).unwrap(); - - let mut redirects_path = PathBuf::from(root_path); - redirects_path.push(locale.as_folder_str()); - redirects_path.push("_redirects.txt"); - let mut redirects = HashMap::new(); - redirects::read_redirects_raw(&redirects_path, &mut redirects).unwrap(); - redirects - } } diff --git a/crates/rari-tools/src/wikihistory.rs b/crates/rari-tools/src/wikihistory.rs index eec4d36e..1aaa8bd9 100644 --- a/crates/rari-tools/src/wikihistory.rs +++ b/crates/rari-tools/src/wikihistory.rs @@ -9,7 +9,10 @@ use serde_json::Value; use crate::error::ToolError; -pub fn update_wiki_history(locale: Locale, pairs: &Vec<(String, String)>) -> Result<(), ToolError> { +pub(crate) fn update_wiki_history( + locale: Locale, + pairs: &[(String, String)], +) -> Result<(), ToolError> { let mut all = read_wiki_history(locale)?; for (old_slug, new_slug) in pairs { if let Some(to) = all.remove(old_slug) { @@ -20,7 +23,7 @@ pub fn update_wiki_history(locale: Locale, pairs: &Vec<(String, String)>) -> Res Ok(()) } -pub fn delete_from_wiki_history(locale: Locale, slugs: &Vec) -> Result<(), ToolError> { +pub(crate) fn delete_from_wiki_history(locale: Locale, slugs: &[String]) -> Result<(), ToolError> { let mut all = read_wiki_history(locale)?; for slug in slugs { all.remove(slug); @@ -40,7 +43,7 @@ fn write_wiki_history(locale: Locale, all: BTreeMap) -> Result<() Ok(()) } -fn read_wiki_history(locale: Locale) -> Result, ToolError> { +pub(crate) fn read_wiki_history(locale: Locale) -> Result, ToolError> { let wiki_history_path = wiki_history_path(locale)?; // Read the content of the JSON file let wiki_history_content = fs::read_to_string(&wiki_history_path)?; diff --git a/crates/rari-types/src/locale.rs b/crates/rari-types/src/locale.rs index 498355cc..eb2ced7a 100644 --- a/crates/rari-types/src/locale.rs +++ b/crates/rari-types/src/locale.rs @@ -1,4 +1,5 @@ use std::fmt::Display; +use std::iter::once; use std::str::FromStr; use std::sync::LazyLock; @@ -93,19 +94,27 @@ impl Display for Locale { } } +static ACTIVE_TRANSLATED_LOCALES: &[Locale] = &[ + Locale::Es, + Locale::Fr, + Locale::Ja, + Locale::Ko, + Locale::PtBr, + Locale::Ru, + Locale::ZhCn, + Locale::ZhTw, +]; + static LOCALES_FOR_GENERICS_AND_SPAS: LazyLock> = LazyLock::new(|| { - let default_locales = [ - Locale::EnUs, - Locale::Es, - Locale::Fr, - Locale::Ja, - Locale::Ko, - Locale::PtBr, - Locale::Ru, - Locale::ZhCn, - Locale::ZhTw, - ]; - default_locales + once(&Locale::EnUs) + .chain(ACTIVE_TRANSLATED_LOCALES.iter()) + .chain(settings().additional_locales_for_generics_and_spas.iter()) + .map(ToOwned::to_owned) + .collect::>() +}); + +static TRANSLATED_LOCALES: LazyLock> = LazyLock::new(|| { + ACTIVE_TRANSLATED_LOCALES .iter() .chain(settings().additional_locales_for_generics_and_spas.iter()) .map(ToOwned::to_owned) @@ -144,6 +153,10 @@ impl Locale { &LOCALES_FOR_GENERICS_AND_SPAS } } + + pub fn translated() -> &'static [Self] { + &TRANSLATED_LOCALES + } } impl FromStr for Locale { diff --git a/crates/rari-types/src/settings.rs b/crates/rari-types/src/settings.rs index 0dfa687b..8a9ffb9a 100644 --- a/crates/rari-types/src/settings.rs +++ b/crates/rari-types/src/settings.rs @@ -22,6 +22,7 @@ pub struct Settings { pub legacy_live_samples_base_url: String, pub interactive_examples_base_url: String, pub additional_locales_for_generics_and_spas: Vec, + pub reader_ignores_gitignore: bool, } impl Settings { @@ -57,6 +58,10 @@ impl Settings { "CACHE_CONTENT", std::env::var("TESTING_CACHE_CONTENT").unwrap(), ); + std::env::set_var( + "READER_IGNORES_GITIGNORE", + std::env::var("TESTING_READER_IGNORES_GITIGNORE").unwrap(), + ); Self::new_internal() } #[cfg(not(feature = "testing"))]