Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rdfs:label and skos:definition #150

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 48 additions & 6 deletions spec_parser/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from rdflib.namespace import DCTERMS, OWL, RDF, RDFS, SH, SKOS, XSD
from rdflib.tools.rdf2dot import rdf2dot

from .util import unmarkdown

URI_BASE = "https://spdx.org/rdf/3.0.1/terms/"


Expand Down Expand Up @@ -59,6 +61,16 @@ def gen_rdf_ontology(model):
g.add((node, RDF.type, OWL.Ontology))
g.add((node, OWL.versionIRI, node))
g.add((node, RDFS.label, Literal("System Package Data Exchange (SPDX) Ontology", lang="en")))
g.add(
(
node,
SKOS.definition,
Literal(
"This ontology defines the terms and relationships used in the SPDX specification to describe system packages",
lang="en",
),
)
)
g.add(
(
node,
Expand All @@ -74,7 +86,16 @@ def gen_rdf_ontology(model):
g.add((node, DCTERMS.license, URIRef("https://spdx.org/licenses/Community-Spec-1.0.html")))
g.add((node, DCTERMS.references, URIRef("https://spdx.dev/specifications/")))
g.add((node, DCTERMS.title, Literal("System Package Data Exchange (SPDX) Ontology", lang="en")))
g.add((node, OMG_ANN.copyright, Literal("Copyright (C) 2024 SPDX Project", lang="en")))
g.add(
(
node,
OMG_ANN.copyright,
Literal(
"Copyright (C) 2010-2024, The Linux Foundation and its Contributors, including SPDX Model contributions from OMG and its Contributors.",
lang="en",
),
)
)

gen_rdf_classes(model, g)
gen_rdf_properties(model, g)
Expand All @@ -92,8 +113,13 @@ def gen_rdf_classes(model, g):
for c in model.classes.values():
node = URIRef(c.iri)
g.add((node, RDF.type, OWL.Class))
if c.name:
g.add((node, RDFS.label, Literal(c.name)))
if c.summary:
g.add((node, RDFS.comment, Literal(c.summary, lang="en")))
g.add((node, RDFS.comment, Literal(unmarkdown(c.summary), lang="en")))
g.add((node, SKOS.definition, Literal(unmarkdown(c.summary), lang="en")))
if c.description:
g.add((node, SKOS.note, Literal(unmarkdown(c.description), lang="en")))
parent = c.metadata.get("SubclassOf")
if parent:
pns = "" if parent.startswith("/") else f"/{c.ns.name}/"
Expand Down Expand Up @@ -164,8 +190,13 @@ def gen_rdf_properties(model, g):
if fqname == "/Core/spdxId":
continue
node = URIRef(p.iri)
if p.name:
g.add((node, RDFS.label, Literal(p.name)))
if p.summary:
g.add((node, RDFS.comment, Literal(p.summary, lang="en")))
g.add((node, RDFS.comment, Literal(unmarkdown(p.summary), lang="en")))
g.add((node, SKOS.definition, Literal(unmarkdown(p.summary), lang="en")))
if p.description:
g.add((node, SKOS.note, Literal(unmarkdown(p.description), lang="en")))
if p.metadata["Nature"] == "ObjectProperty":
g.add((node, RDF.type, OWL.ObjectProperty))
# to add: g.add((node, RDFS.domain, xxx))
Expand All @@ -192,22 +223,33 @@ def gen_rdf_vocabularies(model, g):
for v in model.vocabularies.values():
node = URIRef(v.iri)
g.add((node, RDF.type, OWL.Class))
if v.name:
g.add((node, RDFS.label, Literal(v.name)))
if v.summary:
g.add((node, RDFS.comment, Literal(v.summary, lang="en")))
g.add((node, RDFS.comment, Literal(unmarkdown(v.summary), lang="en")))
g.add((node, SKOS.definition, Literal(unmarkdown(v.summary), lang="en")))
if v.description:
g.add((node, SKOS.note, Literal(unmarkdown(v.description), lang="en")))
for e, d in v.entries.items():
enode = URIRef(v.iri + "/" + e)
g.add((enode, RDF.type, OWL.NamedIndividual))
g.add((enode, RDF.type, node))
g.add((enode, RDFS.label, Literal(e)))
g.add((enode, RDFS.comment, Literal(d, lang="en")))
g.add((enode, RDFS.comment, Literal(unmarkdown(d), lang="en")))
g.add((enode, SKOS.definition, Literal(unmarkdown(d), lang="en")))


def gen_rdf_individuals(model, g):
for i in model.individuals.values():
node = URIRef(i.iri)
g.add((node, RDF.type, OWL.NamedIndividual))
if i.name:
g.add((node, RDFS.label, Literal(i.name)))
if i.summary:
g.add((node, RDFS.comment, Literal(i.summary, lang="en")))
g.add((node, RDFS.comment, Literal(unmarkdown(i.summary), lang="en")))
g.add((node, SKOS.definition, Literal(unmarkdown(i.summary), lang="en")))
if i.description:
g.add((node, SKOS.note, Literal(unmarkdown(i.description), lang="en")))
typ = i.metadata["type"]
typename = "" if typ.startswith("/") else f"/{i.ns.name}/"
typename += typ
Expand Down
64 changes: 64 additions & 0 deletions spec_parser/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# SPDX-License-Identifier: Apache-2.0

"""
This module provides utility functions.

Functions:
unmarkdown(text: str) -> str: Convert Markdown text to plain text.

Types:
ReplaceTuple: A tuple containing a compiled regex pattern and a replacement string or function.
"""

from __future__ import annotations

import re
from typing import Callable, Pattern, Tuple, Union

ReplaceTuple = Tuple[Pattern, Union[str, Callable[[re.Match], str]]]


def _unmarkdown_repl_text_url(match: re.Match) -> str:
"""
Replacement function for Markdown links.

[text](url) -> text <url>
[text](../file.md) -> text
[url](url) -> <url>
"""
text = str(match.group(1))
url = str(match.group(2))
if text.lower() == url.lower():
return f"<{url}>"
elif url.startswith(".") or url.endswith(".md"):
return f"{text}"
else:
return f"{text} <{url}>"


# A list of (regular expression, replacement string/function), ordered by
# the sequence in which they should be applied to a Markdown text.
_unmakdown_rules: list[ReplaceTuple] = [
# [text](url) replacements
(re.compile(r"\[(.*?)\]\((.*?)\)"), _unmarkdown_repl_text_url),
# remove code block markup
(re.compile(r"^```\S*\s*\n?", re.MULTILINE), ""),
# remove code inline markup
(re.compile(r"`([^`]+)`"), r"\1"),
]


def unmarkdown(text: str) -> str:
"""
Convert Markdown text to plain text by applying a series of
regular expression replacements.

Args:
text (str): The Markdown text to be converted.

Returns:
str: The plain text result.
"""
for pattern, replacement in _unmakdown_rules:
text = pattern.sub(replacement, text)
return text