Skip to content

Commit

Permalink
Rollup merge of #104439 - ferrocene:pa-generate-copyright, r=pnkfelix
Browse files Browse the repository at this point in the history
Add prototype to generate `COPYRIGHT` from REUSE metadata

This PR adds a prototype to generate the `COPYRIGHT` file from the metadata gathered with REUSE. There are two new tools:

* `src/tools/collect-license-metadata` invokes REUSE, parses its output and stores a concise JSON representation of the metadata in `src/etc/license-metadata.json`.
* `src/tools/generate-copyright` parses the metadata generated above, (in the future will) gather crate dependencies metadata, and renders the `COPYRIGHT.md` file.

Note that since the contents of those files are currently incorrect, rather than outputting in the paths above, the files will be stored in `build/` and not committed. This will be changed once we're confident about the metadata.

Eventually, `src/etc/license-metadata.json` will be committed into the repository and verified to be up to date by CI (similar to our GitHub Actions configuration), to avoid having people install REUSE on their local machine in most cases.

You can see the (incorrect) generated files in https://gist.github.com/pietroalbini/3f3f22b6f9cc8533abf7494b6a50cf97.

r? `@pnkfelix`
  • Loading branch information
matthiaskrgr authored Dec 6, 2022
2 parents 9db224f + f8a7123 commit e5a01b9
Show file tree
Hide file tree
Showing 15 changed files with 719 additions and 0 deletions.
77 changes: 77 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,7 @@ dependencies = [
"libc",
"num-integer",
"num-traits",
"serde",
"time",
"winapi",
]
Expand Down Expand Up @@ -730,6 +731,16 @@ dependencies = [
"rustc-semver",
]

[[package]]
name = "collect-license-metadata"
version = "0.1.0"
dependencies = [
"anyhow",
"serde",
"serde_json",
"spdx-rs",
]

[[package]]
name = "color-eyre"
version = "0.6.2"
Expand Down Expand Up @@ -1552,6 +1563,15 @@ dependencies = [
"termcolor",
]

[[package]]
name = "generate-copyright"
version = "0.1.0"
dependencies = [
"anyhow",
"serde",
"serde_json",
]

[[package]]
name = "generic-array"
version = "0.14.4"
Expand Down Expand Up @@ -4864,6 +4884,35 @@ dependencies = [
"winapi",
]

[[package]]
name = "spdx-expression"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d7ac03c67c572d85049d6db815e20a4a19b41b3d5cca732ac582342021ad77"
dependencies = [
"nom",
"serde",
"thiserror",
"tracing",
]

[[package]]
name = "spdx-rs"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3c02f6eb7e7b4100c272f685a9ccaccaab302324e8c7ec3e2ee72340fb29ff3"
dependencies = [
"chrono",
"log",
"nom",
"serde",
"spdx-expression",
"strum",
"strum_macros",
"thiserror",
"uuid",
]

[[package]]
name = "stable_deref_trait"
version = "1.2.0"
Expand Down Expand Up @@ -4967,6 +5016,25 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"

[[package]]
name = "strum"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"

[[package]]
name = "strum_macros"
version = "0.24.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn",
]

[[package]]
name = "syn"
version = "1.0.102"
Expand Down Expand Up @@ -5596,6 +5664,15 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8772a4ccbb4e89959023bc5b7cb8623a795caa7092d99f3aa9501b9484d4557d"

[[package]]
name = "uuid"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
dependencies = [
"getrandom 0.2.0",
]

[[package]]
name = "valuable"
version = "0.1.0"
Expand Down
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ members = [
"src/tools/bump-stage0",
"src/tools/replace-version-placeholder",
"src/tools/lld-wrapper",
"src/tools/collect-license-metadata",
"src/tools/generate-copyright",
]

exclude = [
Expand Down
10 changes: 10 additions & 0 deletions config.toml.example
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,16 @@ changelog-seen = 2
# Defaults to the Python interpreter used to execute x.py
#python = "python"

# The path to the REUSE executable to use. Note that REUSE is not required in
# most cases, as our tooling relies on a cached (and shrinked) copy of the
# REUSE output present in the git repository and in our source tarballs.
#
# REUSE is only needed if your changes caused the overral licensing of the
# repository to change, and the cached copy has to be regenerated.
#
# Defaults to the "reuse" command in the system path.
#reuse = "reuse"

# Force Cargo to check that Cargo.lock describes the precise dependency
# set that all the Cargo.toml files create, instead of updating it.
#locked-deps = false
Expand Down
2 changes: 2 additions & 0 deletions src/bootstrap/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,8 @@ impl<'a> Builder<'a> {
run::BumpStage0,
run::ReplaceVersionPlaceholder,
run::Miri,
run::CollectLicenseMetadata,
run::GenerateCopyright,
),
// These commands either don't use paths, or they're special-cased in Build::build()
Kind::Clean | Kind::Format | Kind::Setup => vec![],
Expand Down
3 changes: 3 additions & 0 deletions src/bootstrap/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ pub struct Config {
pub npm: Option<PathBuf>,
pub gdb: Option<PathBuf>,
pub python: Option<PathBuf>,
pub reuse: Option<PathBuf>,
pub cargo_native_static: bool,
pub configure_args: Vec<String>,

Expand Down Expand Up @@ -611,6 +612,7 @@ define_config! {
nodejs: Option<String> = "nodejs",
npm: Option<String> = "npm",
python: Option<String> = "python",
reuse: Option<String> = "reuse",
locked_deps: Option<bool> = "locked-deps",
vendor: Option<bool> = "vendor",
full_bootstrap: Option<bool> = "full-bootstrap",
Expand Down Expand Up @@ -1004,6 +1006,7 @@ impl Config {
config.npm = build.npm.map(PathBuf::from);
config.gdb = build.gdb.map(PathBuf::from);
config.python = build.python.map(PathBuf::from);
config.reuse = build.reuse.map(PathBuf::from);
config.submodules = build.submodules;
set(&mut config.low_priority, build.low_priority);
set(&mut config.compiler_docs, build.compiler_docs);
Expand Down
63 changes: 63 additions & 0 deletions src/bootstrap/run.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::path::PathBuf;
use std::process::Command;

use crate::builder::{Builder, RunConfig, ShouldRun, Step};
Expand Down Expand Up @@ -189,3 +190,65 @@ impl Step for Miri {
builder.run(&mut miri);
}
}

#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)]
pub struct CollectLicenseMetadata;

impl Step for CollectLicenseMetadata {
type Output = PathBuf;
const ONLY_HOSTS: bool = true;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.path("src/tools/collect-license-metadata")
}

fn make_run(run: RunConfig<'_>) {
run.builder.ensure(CollectLicenseMetadata);
}

fn run(self, builder: &Builder<'_>) -> Self::Output {
let Some(reuse) = &builder.config.reuse else {
panic!("REUSE is required to collect the license metadata");
};

// Temporary location, it will be moved to src/etc once it's accurate.
let dest = builder.out.join("license-metadata.json");

let mut cmd = builder.tool_cmd(Tool::CollectLicenseMetadata);
cmd.env("REUSE_EXE", reuse);
cmd.env("DEST", &dest);
builder.run(&mut cmd);

dest
}
}

#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)]
pub struct GenerateCopyright;

impl Step for GenerateCopyright {
type Output = PathBuf;
const ONLY_HOSTS: bool = true;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.path("src/tools/generate-copyright")
}

fn make_run(run: RunConfig<'_>) {
run.builder.ensure(GenerateCopyright);
}

fn run(self, builder: &Builder<'_>) -> Self::Output {
let license_metadata = builder.ensure(CollectLicenseMetadata);

// Temporary location, it will be moved to the proper one once it's accurate.
let dest = builder.out.join("COPYRIGHT.md");

let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
cmd.env("LICENSE_METADATA", &license_metadata);
cmd.env("DEST", &dest);
builder.run(&mut cmd);

dest
}
}
7 changes: 7 additions & 0 deletions src/bootstrap/sanity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,13 @@ than building it.
.map(|p| cmd_finder.must_have(p))
.or_else(|| cmd_finder.maybe_have("gdb"));

build.config.reuse = build
.config
.reuse
.take()
.map(|p| cmd_finder.must_have(p))
.or_else(|| cmd_finder.maybe_have("reuse"));

// We're gonna build some custom C code here and there, host triples
// also build some C++ shims for LLVM so we need a C++ compiler.
for target in &build.targets {
Expand Down
2 changes: 2 additions & 0 deletions src/bootstrap/tool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ bootstrap_tool!(
HtmlChecker, "src/tools/html-checker", "html-checker";
BumpStage0, "src/tools/bump-stage0", "bump-stage0";
ReplaceVersionPlaceholder, "src/tools/replace-version-placeholder", "replace-version-placeholder";
CollectLicenseMetadata, "src/tools/collect-license-metadata", "collect-license-metadata";
GenerateCopyright, "src/tools/generate-copyright", "generate-copyright";
);

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
Expand Down
10 changes: 10 additions & 0 deletions src/tools/collect-license-metadata/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[package]
name = "collect-license-metadata"
version = "0.1.0"
edition = "2021"

[dependencies]
anyhow = "1.0.65"
serde = { version = "1.0.147", features = ["derive"] }
serde_json = "1.0.85"
spdx-rs = "0.5.1"
65 changes: 65 additions & 0 deletions src/tools/collect-license-metadata/src/licenses.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
use std::collections::HashMap;

const COPYRIGHT_PREFIXES: &[&str] = &["SPDX-FileCopyrightText:", "Copyright", "(c)", "(C)", "©"];

pub(crate) struct LicensesInterner {
by_id: Vec<License>,
by_struct: HashMap<License, usize>,
}

impl LicensesInterner {
pub(crate) fn new() -> Self {
LicensesInterner { by_id: Vec::new(), by_struct: HashMap::new() }
}

pub(crate) fn intern(&mut self, mut license: License) -> LicenseId {
license.simplify();
if let Some(id) = self.by_struct.get(&license) {
LicenseId(*id)
} else {
let id = self.by_id.len();
self.by_id.push(license.clone());
self.by_struct.insert(license, id);
LicenseId(id)
}
}

pub(crate) fn resolve(&self, id: LicenseId) -> &License {
&self.by_id[id.0]
}
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize)]
#[serde(transparent)]
pub(crate) struct LicenseId(usize);

#[derive(Clone, Hash, PartialEq, Eq, serde::Serialize)]
pub(crate) struct License {
pub(crate) spdx: String,
pub(crate) copyright: Vec<String>,
}

impl License {
fn simplify(&mut self) {
self.remove_copyright_prefixes();
self.copyright.sort();
self.copyright.dedup();
}

fn remove_copyright_prefixes(&mut self) {
for copyright in &mut self.copyright {
let mut stripped = copyright.trim();
let mut previous_stripped;
loop {
previous_stripped = stripped;
for pattern in COPYRIGHT_PREFIXES {
stripped = stripped.trim_start_matches(pattern).trim_start();
}
if stripped == previous_stripped {
break;
}
}
*copyright = stripped.into();
}
}
}
30 changes: 30 additions & 0 deletions src/tools/collect-license-metadata/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
mod licenses;
mod path_tree;
mod reuse;

use crate::licenses::LicensesInterner;
use anyhow::Error;
use std::path::PathBuf;

fn main() -> Result<(), Error> {
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();

let mut interner = LicensesInterner::new();
let paths = crate::reuse::collect(&reuse_exe, &mut interner)?;

let mut tree = crate::path_tree::build(paths);
tree.simplify();

if let Some(parent) = dest.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(
&dest,
&serde_json::to_vec_pretty(&serde_json::json!({
"files": crate::path_tree::expand_interned_licenses(tree, &interner),
}))?,
)?;

Ok(())
}
Loading

0 comments on commit e5a01b9

Please sign in to comment.