Skip to content

Commit

Permalink
feat(sitemaps): write xml sitemaps
Browse files Browse the repository at this point in the history
  • Loading branch information
fiji-flo committed Nov 21, 2024
1 parent c20851b commit a450474
Show file tree
Hide file tree
Showing 9 changed files with 364 additions and 37 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ members = [
"crates/rari-md",
"crates/rari-doc",
"crates/rari-linter",
"crates/rari-sitemap",
"crates/rari-tools",
"crates/css-syntax",
"crates/css-syntax-types",
Expand All @@ -63,6 +64,7 @@ rari-utils = { path = "crates/rari-utils" }
rari-md = { path = "crates/rari-md" }
rari-data = { path = "crates/rari-data" }
rari-templ-func = { path = "crates/rari-templ-func" }
rari-sitemap = { path = "crates/rari-sitemap" }

tracing = "0.1"
tracing-subscriber = "0.3"
Expand Down Expand Up @@ -102,6 +104,7 @@ rari-tools.workspace = true
rari-deps.workspace = true
rari-types.workspace = true
rari-utils.workspace = true
rari-sitemap.workspace = true

serde.workspace = true
serde_json.workspace = true
Expand Down
15 changes: 5 additions & 10 deletions crates/rari-cli/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use rari_doc::pages::types::doc::Doc;
use rari_doc::reader::read_docs_parallel;
use rari_doc::search_index::build_search_index;
use rari_doc::utils::TEMPL_RECORDER_SENDER;
use rari_sitemap::Sitemaps;
use rari_tools::add_redirect::add_redirect;
use rari_tools::history::gather_history;
use rari_tools::popularities::update_popularities;
Expand Down Expand Up @@ -290,21 +291,15 @@ fn main() -> Result<(), Error> {
);
}
if !args.skip_sitemap && args.files.is_empty() && !urls.is_empty() {
let sitemaps = Sitemaps { sitemap_meta: urls };
let start = std::time::Instant::now();
let out_path = build_out_root()?;
fs::create_dir_all(out_path).unwrap();
let out_file = out_path.join("sitemap.txt");
let file = File::create(out_file).unwrap();
let mut buffed = BufWriter::new(file);
urls.sort();
for url in &urls {
buffed.write_all(url.as_bytes())?;
buffed.write_all(b"\n")?;
}
sitemaps.write_all_sitemaps(out_path)?;
println!(
"Took: {: >10.3?} to write sitemap.txt ({})",
"Took: {: >10.3?} to write sitemaps ({})",
start.elapsed(),
urls.len()
sitemaps.sitemap_meta.len()
);
}
if let Some((recorder_handler, tx)) = templ_stats {
Expand Down
102 changes: 77 additions & 25 deletions crates/rari-doc/src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ use std::io::{BufWriter, Write};
use std::iter::once;
use std::path::PathBuf;

use rari_types::globals::build_out_root;
use chrono::NaiveDateTime;
use rari_types::globals::{build_out_root, git_history};
use rari_types::locale::Locale;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use sha2::{Digest, Sha256};
Expand All @@ -26,6 +27,13 @@ use crate::pages::page::{Page, PageBuilder, PageLike};
use crate::pages::types::spa::SPA;
use crate::resolve::url_to_folder_path;

#[derive(Clone, Default)]
pub struct SitemapMeta<'a> {
pub url: Cow<'a, str>,
pub modified: Option<NaiveDateTime>,
pub locale: Locale,
}

/// Builds a single documentation page and writes the output to a JSON file.
///
/// This function takes a `Page` object, builds the page, and writes the resulting content
Expand Down Expand Up @@ -103,16 +111,25 @@ pub fn build_single_page(page: &Page) -> Result<(), DocError> {
///
/// # Returns
///
/// * `Result<Vec<Cow<'_, str>>, DocError>` - Returns a vector of `Cow<'_, str>` containing the URLs of the built
/// pages if successful, or a `DocError` if an error occurs during the process.
/// * `Result<Vec<SitemapMeta<'a>>, DocError>` - Returns a vector of `SitemapMeta` containing the URLs, Locales and
/// optionally the modification time of the built pages if successful, or a `DocError` if an error occurs during
/// the process.
///
/// # Errors
///
/// This function will return an error if:
/// - An error occurs while building any of the documentation pages.
pub fn build_docs(docs: &[Page]) -> Result<Vec<Cow<'_, str>>, DocError> {
pub fn build_docs<'a, 'b: 'a>(docs: &'b [Page]) -> Result<Vec<SitemapMeta<'a>>, DocError> {
docs.into_par_iter()
.map(|page| build_single_page(page).map(|_| Cow::Borrowed(page.url())))
.map(|page| {
let history = git_history().get(page.path());
let modified = history.map(|entry| entry.modified);
build_single_page(page).map(|_| SitemapMeta {
url: Cow::Borrowed(page.url()),
locale: page.locale(),
modified,
})
})
.collect()
}

Expand All @@ -123,18 +140,25 @@ pub fn build_docs(docs: &[Page]) -> Result<Vec<Cow<'_, str>>, DocError> {
///
/// # Returns
///
/// * `Result<Vec<Cow<'static, str>>, DocError>` - Returns a vector of `Cow<'static, str>` containing the URLs of
/// the built pages if successful, or a `DocError` if an error occurs during the process.
/// * `Result<Vec<SitemapMeta<'a>>, DocError>` - Returns a vector of `SitemapMeta` containing the URLs, Locales and
/// optionally the modification time of the built pages if successful, or a `DocError` if an error occurs during
/// the process.
///
/// # Errors
///
/// This function will return an error if:
/// - An error occurs while building any of the curriculum pages.
pub fn build_curriculum_pages() -> Result<Vec<Cow<'static, str>>, DocError> {
pub fn build_curriculum_pages<'a>() -> Result<Vec<SitemapMeta<'a>>, DocError> {
curriculum_files()
.by_path
.values()
.map(|page| build_single_page(page).map(|_| Cow::Owned(page.url().to_string())))
.map(|page| {
build_single_page(page).map(|_| SitemapMeta {
url: Cow::Owned(page.url().to_string()),
locale: page.locale(),
..Default::default()
})
})
.collect()
}

Expand Down Expand Up @@ -169,21 +193,28 @@ fn copy_blog_author_avatars() -> Result<(), DocError> {
///
/// # Returns
///
/// * `Result<Vec<Cow<'static, str>>, DocError>` - Returns a vector of `Cow<'static, str>` containing the URLs of
/// the built pages if successful, or a `DocError` if an error occurs during the process.
/// * `Result<Vec<SitemapMeta<'a>>, DocError>` - Returns a vector of `SitemapMeta` containing the URLs, Locales and
/// optionally the modification time of the built pages if successful, or a `DocError` if an error occurs during
/// the process.
///
/// # Errors
///
/// This function will return an error if:
/// - An error occurs while copying blog author avatars.
/// - An error occurs while building any of the blog pages.
pub fn build_blog_pages() -> Result<Vec<Cow<'static, str>>, DocError> {
pub fn build_blog_pages<'a>() -> Result<Vec<SitemapMeta<'a>>, DocError> {
copy_blog_author_avatars()?;
blog_files()
.posts
.values()
.chain(once(&SPA::from_url("/en-US/blog/").unwrap()))
.map(|page| build_single_page(page).map(|_| Cow::Owned(page.url().to_string())))
.map(|page| {
build_single_page(page).map(|_| SitemapMeta {
url: Cow::Owned(page.url().to_string()),
locale: page.locale(),
..Default::default()
})
})
.collect()
}

Expand All @@ -194,17 +225,24 @@ pub fn build_blog_pages() -> Result<Vec<Cow<'static, str>>, DocError> {
///
/// # Returns
///
/// * `Result<Vec<Cow<'static, str>>, DocError>` - Returns a vector of `Cow<'static, str>` containing the URLs of the
/// built pages if successful, or a `DocError` if an error occurs during the process.
/// * `Result<Vec<SitemapMeta<'a>>, DocError>` - Returns a vector of `SitemapMeta` containing the URLs, Locales and
/// optionally the modification time of the built pages if successful, or a `DocError` if an error occurs during
/// the process.
///
/// # Errors
///
/// This function will return an error if:
/// - An error occurs while building any of the generic pages.
pub fn build_generic_pages() -> Result<Vec<Cow<'static, str>>, DocError> {
pub fn build_generic_pages<'a>() -> Result<Vec<SitemapMeta<'a>>, DocError> {
generic_pages_files()
.values()
.map(|page| build_single_page(page).map(|_| Cow::Owned(page.url().to_string())))
.map(|page| {
build_single_page(page).map(|_| SitemapMeta {
url: Cow::Owned(page.url().to_string()),
locale: page.locale(),
..Default::default()
})
})
.collect()
}

Expand All @@ -215,17 +253,24 @@ pub fn build_generic_pages() -> Result<Vec<Cow<'static, str>>, DocError> {
///
/// # Returns
///
/// * `Result<Vec<Cow<'static, str>>, DocError>` - Returns a vector of `Cow<'static, str>` containing the URLs of
/// the built pages if successful, or a `DocError` if an error occurs during the process.
/// * `Result<Vec<SitemapMeta<'a>>, DocError>` - Returns a vector of `SitemapMeta` containing the URLs, Locales and
/// optionally the modification time of the built pages if successful, or a `DocError` if an error occurs during
/// the process.
///
/// # Errors
///
/// This function will return an error if:
/// - An error occurs while building any of the contributor spotlight pages.
pub fn build_contributor_spotlight_pages() -> Result<Vec<Cow<'static, str>>, DocError> {
pub fn build_contributor_spotlight_pages<'a>() -> Result<Vec<SitemapMeta<'a>>, DocError> {
contributor_spotlight_files()
.values()
.map(|page| build_single_page(page).map(|_| Cow::Owned(page.url().to_string())))
.map(|page| {
build_single_page(page).map(|_| SitemapMeta {
url: Cow::Owned(page.url().to_string()),
locale: page.locale(),
..Default::default()
})
})
.collect()
}

Expand All @@ -236,17 +281,24 @@ pub fn build_contributor_spotlight_pages() -> Result<Vec<Cow<'static, str>>, Doc
///
/// # Returns
///
/// * `Result<Vec<Cow<'static, str>>, DocError>` - Returns a vector of `Cow<'static, str>` containing the URLs of
/// the built SPAs if successful, or a `DocError` if an error occurs during the process.
/// * `Result<Vec<SitemapMeta<'a>>, DocError>` - Returns a vector of `SitemapMeta` containing the URLs, Locales and
/// optionally the modification time of the built pages if successful, or a `DocError` if an error occurs during
/// the process.
///
/// # Errors
///
/// This function will return an error if:
/// - An error occurs while building any of the SPAs.
pub fn build_spas() -> Result<Vec<Cow<'static, str>>, DocError> {
pub fn build_spas<'a>() -> Result<Vec<SitemapMeta<'a>>, DocError> {
SPA::all()
.iter()
.filter_map(|(slug, locale)| SPA::from_slug(slug, *locale))
.map(|page| build_single_page(&page).map(|_| Cow::Owned(page.url().to_string())))
.map(|page| {
build_single_page(&page).map(|_| SitemapMeta {
url: Cow::Owned(page.url().to_string()),
locale: page.locale(),
..Default::default()
})
})
.collect()
}
2 changes: 1 addition & 1 deletion crates/rari-doc/src/pages/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ fn build_doc(doc: &Doc) -> Result<BuiltPage, DocError> {
.unwrap_or_default()
.to_string_lossy()
.to_string();
let history = git_history().get(&doc.meta.path);
let history = git_history().get(doc.path());
let modified = history.map(|entry| entry.modified).unwrap_or_default();
let short_title = doc
.short_title()
Expand Down
6 changes: 5 additions & 1 deletion crates/rari-doc/src/pages/types/spa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,11 @@ impl SPA {
locale.as_url_str(),
"/",
build_spa.slug,
if build_spa.trailing_slash { "/" } else { "" }
if build_spa.trailing_slash && !build_spa.slug.is_empty() {
"/"
} else {
""
}
),
locale,
page_type: PageType::SPA,
Expand Down
18 changes: 18 additions & 0 deletions crates/rari-sitemap/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "rari-sitemap"
version = "0.0.21"
edition.workspace = true
authors.workspace = true
license.workspace = true
rust-version.workspace = true

[dependencies]
rari-doc.workspace = true
rari-types.workspace = true
rari-utils.workspace = true
thiserror.workspace = true
serde.workspace = true
chrono.workspace = true

quick-xml = { version = "0.37", features = ["serialize"] }
flate2 = "1"
Loading

0 comments on commit a450474

Please sign in to comment.