From 1164a4fd5a2b678950a952854c38591b62fa6435 Mon Sep 17 00:00:00 2001 From: Jens Reimann Date: Thu, 31 Oct 2024 14:41:54 +0100 Subject: [PATCH] feat: add size of documents and sbom data license --- common/src/hashing.rs | 6 ++ entity/src/lib.rs | 1 - entity/src/sbom.rs | 1 + entity/src/source_document.rs | 1 + migration/src/lib.rs | 2 + migration/src/m0000690_alter_sbom_details.rs | 72 +++++++++++++++++++ modules/fundamental/src/sbom/model/details.rs | 8 ++- modules/fundamental/src/sbom/model/mod.rs | 2 + .../src/source_document/model/mod.rs | 2 + modules/graphql/src/sbom.rs | 2 + modules/ingestor/src/graph/advisory/mod.rs | 1 + .../src/graph/sbom/clearly_defined.rs | 1 + modules/ingestor/src/graph/sbom/cyclonedx.rs | 17 +++++ modules/ingestor/src/graph/sbom/mod.rs | 5 ++ modules/ingestor/src/graph/sbom/spdx.rs | 4 +- .../src/service/sbom/clearly_defined.rs | 17 +++-- openapi.yaml | 10 +++ 17 files changed, 137 insertions(+), 15 deletions(-) create mode 100644 migration/src/m0000690_alter_sbom_details.rs diff --git a/common/src/hashing.rs b/common/src/hashing.rs index 5a5271615..283ff7966 100644 --- a/common/src/hashing.rs +++ b/common/src/hashing.rs @@ -11,6 +11,7 @@ pub struct Contexts { sha512: Context, sha384: Context, sha256: Context, + size: u64, } impl Contexts { @@ -19,6 +20,7 @@ impl Contexts { sha512: Context::new(&SHA512), sha384: Context::new(&SHA384), sha256: Context::new(&SHA256), + size: 0, } } @@ -26,6 +28,7 @@ impl Contexts { self.sha512.update(data); self.sha384.update(data); self.sha256.update(data); + self.size += data.len() as u64; } pub fn digests(&self) -> Digests { @@ -33,6 +36,7 @@ impl Contexts { sha512: self.sha512.clone().finish(), sha384: self.sha384.clone().finish(), sha256: self.sha256.clone().finish(), + size: self.size, } } @@ -41,6 +45,7 @@ impl Contexts { sha512: self.sha512.finish(), sha384: self.sha384.finish(), sha256: self.sha256.finish(), + size: self.size, } } } @@ -56,6 +61,7 @@ pub struct Digests { pub sha512: Digest, pub sha384: Digest, pub sha256: Digest, + pub size: u64, } impl Digests { diff --git a/entity/src/lib.rs b/entity/src/lib.rs index 055b2ebcb..1455478a9 100644 --- a/entity/src/lib.rs +++ b/entity/src/lib.rs @@ -26,7 +26,6 @@ pub mod sbom_node; pub mod sbom_package; pub mod sbom_package_cpe_ref; pub mod sbom_package_purl_ref; - pub mod source_document; pub mod status; pub mod version_range; diff --git a/entity/src/sbom.rs b/entity/src/sbom.rs index 81d42a116..9f5572db8 100644 --- a/entity/src/sbom.rs +++ b/entity/src/sbom.rs @@ -16,6 +16,7 @@ pub struct Model { pub published: Option, pub authors: Vec, + pub data_licenses: Vec, pub source_document_id: Option, diff --git a/entity/src/source_document.rs b/entity/src/source_document.rs index 447e9b1b6..46224d02c 100644 --- a/entity/src/source_document.rs +++ b/entity/src/source_document.rs @@ -8,6 +8,7 @@ pub struct Model { pub sha256: String, pub sha384: String, pub sha512: String, + pub size: i64, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/migration/src/lib.rs b/migration/src/lib.rs index 67120295a..c925deb42 100644 --- a/migration/src/lib.rs +++ b/migration/src/lib.rs @@ -85,6 +85,7 @@ mod m0000650_alter_advisory_tracking; mod m0000660_purl_id_indexes; mod m0000670_version_cmp; mod m0000680_fix_update_deprecated_advisory; +mod m0000690_alter_sbom_details; pub struct Migrator; @@ -177,6 +178,7 @@ impl MigratorTrait for Migrator { Box::new(m0000660_purl_id_indexes::Migration), Box::new(m0000670_version_cmp::Migration), Box::new(m0000680_fix_update_deprecated_advisory::Migration), + Box::new(m0000690_alter_sbom_details::Migration), ] } } diff --git a/migration/src/m0000690_alter_sbom_details.rs b/migration/src/m0000690_alter_sbom_details.rs new file mode 100644 index 000000000..5228922fa --- /dev/null +++ b/migration/src/m0000690_alter_sbom_details.rs @@ -0,0 +1,72 @@ +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .alter_table( + Table::alter() + .table(SourceDocument::Table) + .add_column( + ColumnDef::new(SourceDocument::Size) + .big_integer() + .default(0) + .to_owned(), + ) + .to_owned(), + ) + .await?; + + manager + .alter_table( + Table::alter() + .table(Sbom::Table) + .add_column( + ColumnDef::new(Sbom::DataLicenses) + .array(ColumnType::Text) + .to_owned(), + ) + .to_owned(), + ) + .await?; + + Ok(()) + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .alter_table( + Table::alter() + .table(Sbom::Table) + .drop_column(Sbom::DataLicenses) + .to_owned(), + ) + .await?; + + manager + .alter_table( + Table::alter() + .table(SourceDocument::Table) + .drop_column(SourceDocument::Size) + .to_owned(), + ) + .await?; + + Ok(()) + } +} + +#[derive(DeriveIden)] +enum SourceDocument { + Table, + Size, +} + +#[derive(DeriveIden)] +enum Sbom { + Table, + DataLicenses, +} diff --git a/modules/fundamental/src/sbom/model/details.rs b/modules/fundamental/src/sbom/model/details.rs index 9dea4a45d..8d3514f91 100644 --- a/modules/fundamental/src/sbom/model/details.rs +++ b/modules/fundamental/src/sbom/model/details.rs @@ -11,9 +11,11 @@ use sea_orm::{JoinType, ModelTrait, QueryFilter, QuerySelect, RelationTrait}; use sea_query::{Asterisk, Expr, Func, SimpleExpr}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; -use trustify_common::db::multi_model::SelectIntoMultiModel; -use trustify_common::db::VersionMatches; -use trustify_common::{cpe::CpeCompare, db::ConnectionOrTransaction, memo::Memo}; +use trustify_common::{ + cpe::CpeCompare, + db::{multi_model::SelectIntoMultiModel, ConnectionOrTransaction, VersionMatches}, + memo::Memo, +}; use trustify_entity::{ base_purl, purl_status, qualified_purl::{self}, diff --git a/modules/fundamental/src/sbom/model/mod.rs b/modules/fundamental/src/sbom/model/mod.rs index 171f840e0..9400ed905 100644 --- a/modules/fundamental/src/sbom/model/mod.rs +++ b/modules/fundamental/src/sbom/model/mod.rs @@ -24,6 +24,7 @@ pub struct SbomHead { pub document_id: String, pub labels: Labels, + pub data_licenses: Vec, #[schema(required)] #[serde(with = "time::serde::rfc3339::option")] @@ -49,6 +50,7 @@ impl SbomHead { name: sbom_node .map(|node| node.name.clone()) .unwrap_or("".to_string()), + data_licenses: sbom.data_licenses.clone(), }) } } diff --git a/modules/fundamental/src/source_document/model/mod.rs b/modules/fundamental/src/source_document/model/mod.rs index b5694e7fb..d41b9ee17 100644 --- a/modules/fundamental/src/source_document/model/mod.rs +++ b/modules/fundamental/src/source_document/model/mod.rs @@ -12,6 +12,7 @@ pub struct SourceDocument { pub sha256: String, pub sha384: String, pub sha512: String, + pub size: u64, } impl SourceDocument { @@ -23,6 +24,7 @@ impl SourceDocument { sha256: format!("sha256:{}", source_document.sha256), sha384: format!("sha384:{}", source_document.sha384), sha512: format!("sha512:{}", source_document.sha512), + size: source_document.size as u64, }) } } diff --git a/modules/graphql/src/sbom.rs b/modules/graphql/src/sbom.rs index 4eef60669..7430a339e 100644 --- a/modules/graphql/src/sbom.rs +++ b/modules/graphql/src/sbom.rs @@ -25,6 +25,7 @@ impl SbomQuery { published: sbom_context.sbom.published, authors: sbom_context.sbom.authors, source_document_id: sbom_context.sbom.source_document_id, + data_licenses: sbom_context.sbom.data_licenses, }), Ok(None) => Err(FieldError::new("SBOM not found")), Err(err) => Err(FieldError::from(err)), @@ -67,6 +68,7 @@ impl SbomQuery { published: sbom.sbom.published, authors: sbom.sbom.authors, source_document_id: sbom.sbom.source_document_id, + data_licenses: sbom.sbom.data_licenses, }) }) .collect() diff --git a/modules/ingestor/src/graph/advisory/mod.rs b/modules/ingestor/src/graph/advisory/mod.rs index e3e19695f..352dd9d46 100644 --- a/modules/ingestor/src/graph/advisory/mod.rs +++ b/modules/ingestor/src/graph/advisory/mod.rs @@ -141,6 +141,7 @@ impl Graph { sha256: Set(sha256), sha384: Set(digests.sha384.encode_hex()), sha512: Set(digests.sha512.encode_hex()), + size: Set(digests.size as i64), }; let doc = doc_model.insert(&self.connection(&tx)).await?; diff --git a/modules/ingestor/src/graph/sbom/clearly_defined.rs b/modules/ingestor/src/graph/sbom/clearly_defined.rs index caa5c137f..6f70753b4 100644 --- a/modules/ingestor/src/graph/sbom/clearly_defined.rs +++ b/modules/ingestor/src/graph/sbom/clearly_defined.rs @@ -93,6 +93,7 @@ impl Into for &Curation { name: self.coordinates.base_purl().to_string(), published: None, authors: vec!["ClearlyDefined: Community-Curated".to_string()], + data_licenses: vec![], } } } diff --git a/modules/ingestor/src/graph/sbom/cyclonedx.rs b/modules/ingestor/src/graph/sbom/cyclonedx.rs index 9b9eb111f..1600ee603 100644 --- a/modules/ingestor/src/graph/sbom/cyclonedx.rs +++ b/modules/ingestor/src/graph/sbom/cyclonedx.rs @@ -61,11 +61,28 @@ impl<'a> From> for SbomInformation { // TODO: not sure what to use instead, the version will most likely be `1`. .unwrap_or_else(|| sbom.version.to_string()); + let data_licenses = sbom + .metadata + .as_ref() + .and_then(|metadata| metadata.licenses.as_ref()) + .map(|licenses| &licenses.0) + .into_iter() + .flatten() + .map(|license| match license { + LicenseChoice::License(l) => match &l.license_identifier { + LicenseIdentifier::SpdxId(spdx) => spdx.to_string(), + LicenseIdentifier::Name(name) => name.to_string(), + }, + LicenseChoice::Expression(e) => e.to_string(), + }) + .collect(); + Self { node_id: CYCLONEDX_DOC_REF.to_string(), name, published, authors, + data_licenses, } } } diff --git a/modules/ingestor/src/graph/sbom/mod.rs b/modules/ingestor/src/graph/sbom/mod.rs index cc5744d93..ae92deeae 100644 --- a/modules/ingestor/src/graph/sbom/mod.rs +++ b/modules/ingestor/src/graph/sbom/mod.rs @@ -52,6 +52,8 @@ pub struct SbomInformation { pub name: String, pub published: Option, pub authors: Vec, + /// The licenses of the data itself, if known. + pub data_licenses: Vec, } impl From<()> for SbomInformation { @@ -116,6 +118,7 @@ impl Graph { name, published, authors, + data_licenses, } = info.into(); let connection = self.db.connection(&tx); @@ -127,6 +130,7 @@ impl Graph { sha256: Set(sha256), sha384: Set(digests.sha384.encode_hex()), sha512: Set(digests.sha512.encode_hex()), + size: Set(digests.size as i64), }; let doc = doc_model.insert(&connection).await?; @@ -142,6 +146,7 @@ impl Graph { source_document_id: Set(Some(doc.id)), labels: Set(labels.into()), + data_licenses: Set(data_licenses), }; let node_model = sbom_node::ActiveModel { diff --git a/modules/ingestor/src/graph/sbom/spdx.rs b/modules/ingestor/src/graph/sbom/spdx.rs index c57adb41b..522c2c3e8 100644 --- a/modules/ingestor/src/graph/sbom/spdx.rs +++ b/modules/ingestor/src/graph/sbom/spdx.rs @@ -13,8 +13,7 @@ use crate::{ use sbom_walker::report::{check, ReportSink}; use serde_json::Value; use spdx_rs::models::{RelationshipType, SPDX}; -use std::collections::HashMap; -use std::str::FromStr; +use std::{collections::HashMap, str::FromStr}; use time::OffsetDateTime; use tracing::instrument; use trustify_common::{cpe::Cpe, db::Transactional, purl::Purl}; @@ -44,6 +43,7 @@ impl<'a> From> for SbomInformation { .creation_info .creators .clone(), + data_licenses: vec![value.0.document_creation_information.data_license.clone()], } } } diff --git a/modules/ingestor/src/service/sbom/clearly_defined.rs b/modules/ingestor/src/service/sbom/clearly_defined.rs index 5963cbcd8..17025f46b 100644 --- a/modules/ingestor/src/service/sbom/clearly_defined.rs +++ b/modules/ingestor/src/service/sbom/clearly_defined.rs @@ -1,17 +1,15 @@ -use crate::graph::sbom::SbomInformation; -use crate::graph::Graph; -use crate::model::IngestResult; -use crate::service::Error; +use crate::{graph::sbom::SbomInformation, graph::Graph, model::IngestResult, service::Error}; use anyhow::anyhow; use hex::ToHex; use jsonpath_rust::JsonPath; use sea_orm::EntityTrait; use std::str::FromStr; -use trustify_common::hashing::Digests; -use trustify_common::id::{Id, TrySelectForId}; -use trustify_common::purl::Purl; -use trustify_entity::labels::Labels; -use trustify_entity::sbom; +use trustify_common::{ + hashing::Digests, + id::{Id, TrySelectForId}, + purl::Purl, +}; +use trustify_entity::{labels::Labels, sbom}; pub struct ClearlyDefinedLoader<'g> { graph: &'g Graph, @@ -80,6 +78,7 @@ impl<'g> ClearlyDefinedLoader<'g> { name: document_id.to_string(), published: None, authors: vec!["ClearlyDefined Definitions".to_string()], + data_licenses: vec![], }, &tx, ) diff --git a/openapi.yaml b/openapi.yaml index 3599f5ee3..d8b5e6b2f 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -3238,6 +3238,7 @@ components: - id - document_id - labels + - data_licenses - published - authors - name @@ -3246,6 +3247,10 @@ components: type: array items: type: string + data_licenses: + type: array + items: + type: string document_id: type: string id: @@ -3377,6 +3382,7 @@ components: - sha256 - sha384 - sha512 + - size properties: sha256: type: string @@ -3384,6 +3390,10 @@ components: type: string sha512: type: string + size: + type: integer + format: int64 + minimum: 0 SpdxLicenseDetails: allOf: - $ref: '#/components/schemas/SpdxLicenseSummary'