-
Notifications
You must be signed in to change notification settings - Fork 13k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rollup merge of #104439 - ferrocene:pa-generate-copyright, r=pnkfelix
Add prototype to generate `COPYRIGHT` from REUSE metadata This PR adds a prototype to generate the `COPYRIGHT` file from the metadata gathered with REUSE. There are two new tools: * `src/tools/collect-license-metadata` invokes REUSE, parses its output and stores a concise JSON representation of the metadata in `src/etc/license-metadata.json`. * `src/tools/generate-copyright` parses the metadata generated above, (in the future will) gather crate dependencies metadata, and renders the `COPYRIGHT.md` file. Note that since the contents of those files are currently incorrect, rather than outputting in the paths above, the files will be stored in `build/` and not committed. This will be changed once we're confident about the metadata. Eventually, `src/etc/license-metadata.json` will be committed into the repository and verified to be up to date by CI (similar to our GitHub Actions configuration), to avoid having people install REUSE on their local machine in most cases. You can see the (incorrect) generated files in https://gist.github.com/pietroalbini/3f3f22b6f9cc8533abf7494b6a50cf97. r? `@pnkfelix`
- Loading branch information
Showing
15 changed files
with
719 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
[package] | ||
name = "collect-license-metadata" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
[dependencies] | ||
anyhow = "1.0.65" | ||
serde = { version = "1.0.147", features = ["derive"] } | ||
serde_json = "1.0.85" | ||
spdx-rs = "0.5.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
use std::collections::HashMap; | ||
|
||
const COPYRIGHT_PREFIXES: &[&str] = &["SPDX-FileCopyrightText:", "Copyright", "(c)", "(C)", "©"]; | ||
|
||
pub(crate) struct LicensesInterner { | ||
by_id: Vec<License>, | ||
by_struct: HashMap<License, usize>, | ||
} | ||
|
||
impl LicensesInterner { | ||
pub(crate) fn new() -> Self { | ||
LicensesInterner { by_id: Vec::new(), by_struct: HashMap::new() } | ||
} | ||
|
||
pub(crate) fn intern(&mut self, mut license: License) -> LicenseId { | ||
license.simplify(); | ||
if let Some(id) = self.by_struct.get(&license) { | ||
LicenseId(*id) | ||
} else { | ||
let id = self.by_id.len(); | ||
self.by_id.push(license.clone()); | ||
self.by_struct.insert(license, id); | ||
LicenseId(id) | ||
} | ||
} | ||
|
||
pub(crate) fn resolve(&self, id: LicenseId) -> &License { | ||
&self.by_id[id.0] | ||
} | ||
} | ||
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize)] | ||
#[serde(transparent)] | ||
pub(crate) struct LicenseId(usize); | ||
|
||
#[derive(Clone, Hash, PartialEq, Eq, serde::Serialize)] | ||
pub(crate) struct License { | ||
pub(crate) spdx: String, | ||
pub(crate) copyright: Vec<String>, | ||
} | ||
|
||
impl License { | ||
fn simplify(&mut self) { | ||
self.remove_copyright_prefixes(); | ||
self.copyright.sort(); | ||
self.copyright.dedup(); | ||
} | ||
|
||
fn remove_copyright_prefixes(&mut self) { | ||
for copyright in &mut self.copyright { | ||
let mut stripped = copyright.trim(); | ||
let mut previous_stripped; | ||
loop { | ||
previous_stripped = stripped; | ||
for pattern in COPYRIGHT_PREFIXES { | ||
stripped = stripped.trim_start_matches(pattern).trim_start(); | ||
} | ||
if stripped == previous_stripped { | ||
break; | ||
} | ||
} | ||
*copyright = stripped.into(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
mod licenses; | ||
mod path_tree; | ||
mod reuse; | ||
|
||
use crate::licenses::LicensesInterner; | ||
use anyhow::Error; | ||
use std::path::PathBuf; | ||
|
||
fn main() -> Result<(), Error> { | ||
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into(); | ||
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into(); | ||
|
||
let mut interner = LicensesInterner::new(); | ||
let paths = crate::reuse::collect(&reuse_exe, &mut interner)?; | ||
|
||
let mut tree = crate::path_tree::build(paths); | ||
tree.simplify(); | ||
|
||
if let Some(parent) = dest.parent() { | ||
std::fs::create_dir_all(parent)?; | ||
} | ||
std::fs::write( | ||
&dest, | ||
&serde_json::to_vec_pretty(&serde_json::json!({ | ||
"files": crate::path_tree::expand_interned_licenses(tree, &interner), | ||
}))?, | ||
)?; | ||
|
||
Ok(()) | ||
} |
Oops, something went wrong.