Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add prototype to generate COPYRIGHT from REUSE metadata #104439

Merged
merged 6 commits into from
Dec 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ dependencies = [
"libc",
"num-integer",
"num-traits",
"serde",
"time",
"winapi",
]
Expand Down Expand Up @@ -712,6 +713,16 @@ dependencies = [
"rustc-semver",
]

[[package]]
name = "collect-license-metadata"
version = "0.1.0"
dependencies = [
"anyhow",
"serde",
"serde_json",
"spdx-rs",
]

[[package]]
name = "color-eyre"
version = "0.6.2"
Expand Down Expand Up @@ -1487,6 +1498,15 @@ dependencies = [
"termcolor",
]

[[package]]
name = "generate-copyright"
version = "0.1.0"
dependencies = [
"anyhow",
"serde",
"serde_json",
]

[[package]]
name = "generic-array"
version = "0.14.4"
Expand Down Expand Up @@ -4628,6 +4648,35 @@ dependencies = [
"winapi",
]

[[package]]
name = "spdx-expression"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d7ac03c67c572d85049d6db815e20a4a19b41b3d5cca732ac582342021ad77"
dependencies = [
"nom",
"serde",
"thiserror",
"tracing",
]

[[package]]
name = "spdx-rs"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3c02f6eb7e7b4100c272f685a9ccaccaab302324e8c7ec3e2ee72340fb29ff3"
dependencies = [
"chrono",
"log",
"nom",
"serde",
"spdx-expression",
"strum",
"strum_macros",
"thiserror",
"uuid",
]

[[package]]
name = "stable_deref_trait"
version = "1.2.0"
Expand Down Expand Up @@ -4731,6 +4780,25 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"

[[package]]
name = "strum"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"

[[package]]
name = "strum_macros"
version = "0.24.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn",
]

[[package]]
name = "syn"
version = "1.0.102"
Expand Down Expand Up @@ -5357,6 +5425,15 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8772a4ccbb4e89959023bc5b7cb8623a795caa7092d99f3aa9501b9484d4557d"

[[package]]
name = "uuid"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
dependencies = [
"getrandom 0.2.0",
]

[[package]]
name = "valuable"
version = "0.1.0"
Expand Down
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ members = [
"src/tools/bump-stage0",
"src/tools/replace-version-placeholder",
"src/tools/lld-wrapper",
"src/tools/collect-license-metadata",
"src/tools/generate-copyright",
]

exclude = [
Expand Down
10 changes: 10 additions & 0 deletions config.toml.example
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,16 @@ changelog-seen = 2
# Defaults to the Python interpreter used to execute x.py
#python = "python"

# The path to the REUSE executable to use. Note that REUSE is not required in
# most cases, as our tooling relies on a cached (and shrinked) copy of the
# REUSE output present in the git repository and in our source tarballs.
#
# REUSE is only needed if your changes caused the overral licensing of the
# repository to change, and the cached copy has to be regenerated.
#
# Defaults to the "reuse" command in the system path.
#reuse = "reuse"

# Force Cargo to check that Cargo.lock describes the precise dependency
# set that all the Cargo.toml files create, instead of updating it.
#locked-deps = false
Expand Down
2 changes: 2 additions & 0 deletions src/bootstrap/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,8 @@ impl<'a> Builder<'a> {
run::BumpStage0,
run::ReplaceVersionPlaceholder,
run::Miri,
run::CollectLicenseMetadata,
run::GenerateCopyright,
),
// These commands either don't use paths, or they're special-cased in Build::build()
Kind::Clean | Kind::Format | Kind::Setup => vec![],
Expand Down
3 changes: 3 additions & 0 deletions src/bootstrap/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ pub struct Config {
pub npm: Option<PathBuf>,
pub gdb: Option<PathBuf>,
pub python: Option<PathBuf>,
pub reuse: Option<PathBuf>,
pub cargo_native_static: bool,
pub configure_args: Vec<String>,

Expand Down Expand Up @@ -610,6 +611,7 @@ define_config! {
nodejs: Option<String> = "nodejs",
npm: Option<String> = "npm",
python: Option<String> = "python",
reuse: Option<String> = "reuse",
locked_deps: Option<bool> = "locked-deps",
vendor: Option<bool> = "vendor",
full_bootstrap: Option<bool> = "full-bootstrap",
Expand Down Expand Up @@ -1003,6 +1005,7 @@ impl Config {
config.npm = build.npm.map(PathBuf::from);
config.gdb = build.gdb.map(PathBuf::from);
config.python = build.python.map(PathBuf::from);
config.reuse = build.reuse.map(PathBuf::from);
config.submodules = build.submodules;
set(&mut config.low_priority, build.low_priority);
set(&mut config.compiler_docs, build.compiler_docs);
Expand Down
63 changes: 63 additions & 0 deletions src/bootstrap/run.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::path::PathBuf;
use std::process::Command;

use crate::builder::{Builder, RunConfig, ShouldRun, Step};
Expand Down Expand Up @@ -189,3 +190,65 @@ impl Step for Miri {
builder.run(&mut miri);
}
}

#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)]
pub struct CollectLicenseMetadata;

impl Step for CollectLicenseMetadata {
type Output = PathBuf;
const ONLY_HOSTS: bool = true;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.path("src/tools/collect-license-metadata")
}

fn make_run(run: RunConfig<'_>) {
run.builder.ensure(CollectLicenseMetadata);
}

fn run(self, builder: &Builder<'_>) -> Self::Output {
let Some(reuse) = &builder.config.reuse else {
panic!("REUSE is required to collect the license metadata");
};

// Temporary location, it will be moved to src/etc once it's accurate.
let dest = builder.out.join("license-metadata.json");

let mut cmd = builder.tool_cmd(Tool::CollectLicenseMetadata);
cmd.env("REUSE_EXE", reuse);
cmd.env("DEST", &dest);
builder.run(&mut cmd);

dest
}
}

#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)]
pub struct GenerateCopyright;

impl Step for GenerateCopyright {
type Output = PathBuf;
const ONLY_HOSTS: bool = true;

fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.path("src/tools/generate-copyright")
}

fn make_run(run: RunConfig<'_>) {
run.builder.ensure(GenerateCopyright);
}

fn run(self, builder: &Builder<'_>) -> Self::Output {
let license_metadata = builder.ensure(CollectLicenseMetadata);

// Temporary location, it will be moved to the proper one once it's accurate.
let dest = builder.out.join("COPYRIGHT.md");

let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
cmd.env("LICENSE_METADATA", &license_metadata);
cmd.env("DEST", &dest);
builder.run(&mut cmd);

dest
}
}
7 changes: 7 additions & 0 deletions src/bootstrap/sanity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,13 @@ than building it.
.map(|p| cmd_finder.must_have(p))
.or_else(|| cmd_finder.maybe_have("gdb"));

build.config.reuse = build
.config
.reuse
.take()
.map(|p| cmd_finder.must_have(p))
.or_else(|| cmd_finder.maybe_have("reuse"));

// We're gonna build some custom C code here and there, host triples
// also build some C++ shims for LLVM so we need a C++ compiler.
for target in &build.targets {
Expand Down
2 changes: 2 additions & 0 deletions src/bootstrap/tool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ bootstrap_tool!(
HtmlChecker, "src/tools/html-checker", "html-checker";
BumpStage0, "src/tools/bump-stage0", "bump-stage0";
ReplaceVersionPlaceholder, "src/tools/replace-version-placeholder", "replace-version-placeholder";
CollectLicenseMetadata, "src/tools/collect-license-metadata", "collect-license-metadata";
GenerateCopyright, "src/tools/generate-copyright", "generate-copyright";
);

#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
Expand Down
10 changes: 10 additions & 0 deletions src/tools/collect-license-metadata/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[package]
name = "collect-license-metadata"
version = "0.1.0"
edition = "2021"

[dependencies]
anyhow = "1.0.65"
serde = { version = "1.0.147", features = ["derive"] }
serde_json = "1.0.85"
spdx-rs = "0.5.1"
65 changes: 65 additions & 0 deletions src/tools/collect-license-metadata/src/licenses.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
use std::collections::HashMap;

const COPYRIGHT_PREFIXES: &[&str] = &["SPDX-FileCopyrightText:", "Copyright", "(c)", "(C)", "©"];

pub(crate) struct LicensesInterner {
by_id: Vec<License>,
by_struct: HashMap<License, usize>,
}

impl LicensesInterner {
pub(crate) fn new() -> Self {
LicensesInterner { by_id: Vec::new(), by_struct: HashMap::new() }
}

pub(crate) fn intern(&mut self, mut license: License) -> LicenseId {
license.simplify();
if let Some(id) = self.by_struct.get(&license) {
LicenseId(*id)
} else {
let id = self.by_id.len();
self.by_id.push(license.clone());
self.by_struct.insert(license, id);
LicenseId(id)
}
}

pub(crate) fn resolve(&self, id: LicenseId) -> &License {
&self.by_id[id.0]
}
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize)]
#[serde(transparent)]
pub(crate) struct LicenseId(usize);

#[derive(Clone, Hash, PartialEq, Eq, serde::Serialize)]
pub(crate) struct License {
pub(crate) spdx: String,
pub(crate) copyright: Vec<String>,
}

impl License {
fn simplify(&mut self) {
self.remove_copyright_prefixes();
self.copyright.sort();
self.copyright.dedup();
}

fn remove_copyright_prefixes(&mut self) {
for copyright in &mut self.copyright {
let mut stripped = copyright.trim();
let mut previous_stripped;
loop {
previous_stripped = stripped;
for pattern in COPYRIGHT_PREFIXES {
stripped = stripped.trim_start_matches(pattern).trim_start();
}
if stripped == previous_stripped {
break;
}
}
*copyright = stripped.into();
}
}
}
30 changes: 30 additions & 0 deletions src/tools/collect-license-metadata/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
mod licenses;
mod path_tree;
mod reuse;

use crate::licenses::LicensesInterner;
use anyhow::Error;
use std::path::PathBuf;

fn main() -> Result<(), Error> {
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();

let mut interner = LicensesInterner::new();
let paths = crate::reuse::collect(&reuse_exe, &mut interner)?;

let mut tree = crate::path_tree::build(paths);
tree.simplify();

if let Some(parent) = dest.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(
&dest,
&serde_json::to_vec_pretty(&serde_json::json!({
"files": crate::path_tree::expand_interned_licenses(tree, &interner),
}))?,
)?;

Ok(())
}
Loading