spdx · bact · Sep 8, 2024 · Sep 8, 2024 · Sep 8, 2024 · Sep 8, 2024
diff --git a/spec_parser/rdf.py b/spec_parser/rdf.py
@@ -17,6 +17,8 @@
 from rdflib.namespace import DCTERMS, OWL, RDF, RDFS, SH, SKOS, XSD
 from rdflib.tools.rdf2dot import rdf2dot
 
+from .util import unmarkdown
+
 URI_BASE = "https://spdx.org/rdf/3.0.1/terms/"
 
 
@@ -59,6 +61,16 @@ def gen_rdf_ontology(model):
     g.add((node, RDF.type, OWL.Ontology))
     g.add((node, OWL.versionIRI, node))
     g.add((node, RDFS.label, Literal("System Package Data Exchange (SPDX) Ontology", lang="en")))
+    g.add(
+        (
+            node,
+            SKOS.definition,
+            Literal(
+                "This ontology defines the terms and relationships used in the SPDX specification to describe system packages",
+                lang="en",
+            ),
+        )
+    )
     g.add(
         (
             node,
@@ -74,7 +86,16 @@ def gen_rdf_ontology(model):
     g.add((node, DCTERMS.license, URIRef("https://spdx.org/licenses/Community-Spec-1.0.html")))
     g.add((node, DCTERMS.references, URIRef("https://spdx.dev/specifications/")))
     g.add((node, DCTERMS.title, Literal("System Package Data Exchange (SPDX) Ontology", lang="en")))
-    g.add((node, OMG_ANN.copyright, Literal("Copyright (C) 2024 SPDX Project", lang="en")))
+    g.add(
+        (
+            node,
+            OMG_ANN.copyright,
+            Literal(
+                "Copyright (C) 2010-2024, The Linux Foundation and its Contributors, including SPDX Model contributions from OMG and its Contributors.",
+                lang="en",
+            ),
+        )
+    )
 
     gen_rdf_classes(model, g)
     gen_rdf_properties(model, g)
@@ -92,8 +113,13 @@ def gen_rdf_classes(model, g):
     for c in model.classes.values():
         node = URIRef(c.iri)
         g.add((node, RDF.type, OWL.Class))
+        if c.name:
+            g.add((node, RDFS.label, Literal(c.name)))
         if c.summary:
-            g.add((node, RDFS.comment, Literal(c.summary, lang="en")))
+            g.add((node, RDFS.comment, Literal(unmarkdown(c.summary), lang="en")))
+            g.add((node, SKOS.definition, Literal(unmarkdown(c.summary), lang="en")))
+        if c.description:
+            g.add((node, SKOS.note, Literal(unmarkdown(c.description), lang="en")))
         parent = c.metadata.get("SubclassOf")
         if parent:
             pns = "" if parent.startswith("/") else f"/{c.ns.name}/"
@@ -164,8 +190,13 @@ def gen_rdf_properties(model, g):
         if fqname == "/Core/spdxId":
             continue
         node = URIRef(p.iri)
+        if p.name:
+            g.add((node, RDFS.label, Literal(p.name)))
         if p.summary:
-            g.add((node, RDFS.comment, Literal(p.summary, lang="en")))
+            g.add((node, RDFS.comment, Literal(unmarkdown(p.summary), lang="en")))
+            g.add((node, SKOS.definition, Literal(unmarkdown(p.summary), lang="en")))
+        if p.description:
+            g.add((node, SKOS.note, Literal(unmarkdown(p.description), lang="en")))
         if p.metadata["Nature"] == "ObjectProperty":
             g.add((node, RDF.type, OWL.ObjectProperty))
         # to add: g.add((node, RDFS.domain, xxx))
@@ -192,22 +223,33 @@ def gen_rdf_vocabularies(model, g):
     for v in model.vocabularies.values():
         node = URIRef(v.iri)
         g.add((node, RDF.type, OWL.Class))
+        if v.name:
+            g.add((node, RDFS.label, Literal(v.name)))
         if v.summary:
-            g.add((node, RDFS.comment, Literal(v.summary, lang="en")))
+            g.add((node, RDFS.comment, Literal(unmarkdown(v.summary), lang="en")))
+            g.add((node, SKOS.definition, Literal(unmarkdown(v.summary), lang="en")))
+        if v.description:
+            g.add((node, SKOS.note, Literal(unmarkdown(v.description), lang="en")))
         for e, d in v.entries.items():
             enode = URIRef(v.iri + "/" + e)
             g.add((enode, RDF.type, OWL.NamedIndividual))
             g.add((enode, RDF.type, node))
             g.add((enode, RDFS.label, Literal(e)))
-            g.add((enode, RDFS.comment, Literal(d, lang="en")))
+            g.add((enode, RDFS.comment, Literal(unmarkdown(d), lang="en")))
+            g.add((enode, SKOS.definition, Literal(unmarkdown(d), lang="en")))
 
 
 def gen_rdf_individuals(model, g):
     for i in model.individuals.values():
         node = URIRef(i.iri)
         g.add((node, RDF.type, OWL.NamedIndividual))
+        if i.name:
+            g.add((node, RDFS.label, Literal(i.name)))
         if i.summary:
-            g.add((node, RDFS.comment, Literal(i.summary, lang="en")))
+            g.add((node, RDFS.comment, Literal(unmarkdown(i.summary), lang="en")))
+            g.add((node, SKOS.definition, Literal(unmarkdown(i.summary), lang="en")))
+        if i.description:
+            g.add((node, SKOS.note, Literal(unmarkdown(i.description), lang="en")))
         typ = i.metadata["type"]
         typename = "" if typ.startswith("/") else f"/{i.ns.name}/"
         typename += typ

diff --git a/spec_parser/util.py b/spec_parser/util.py
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+This module provides utility functions.
+
+Functions:
+    unmarkdown(text: str) -> str: Convert Markdown text to plain text.
+
+Types:
+    ReplaceTuple: A tuple containing a compiled regex pattern and a replacement string or function.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Callable, Pattern, Tuple, Union
+
+ReplaceTuple = Tuple[Pattern, Union[str, Callable[[re.Match], str]]]
+
+
+def _unmarkdown_repl_text_url(match: re.Match) -> str:
+    """
+    Replacement function for Markdown links.
+
+    [text](url)         ->  text <url>
+    [text](../file.md)  ->  text
+    [url](url)          ->  <url>
+    """
+    text = str(match.group(1))
+    url = str(match.group(2))
+    if text.lower() == url.lower():
+        return f"<{url}>"
+    elif url.startswith(".") or url.endswith(".md"):
+        return f"{text}"
+    else:
+        return f"{text} <{url}>"
+
+
+# A list of (regular expression, replacement string/function), ordered by
+# the sequence in which they should be applied to a Markdown text.
+_unmakdown_rules: list[ReplaceTuple] = [
+    # [text](url) replacements
+    (re.compile(r"\[(.*?)\]\((.*?)\)"), _unmarkdown_repl_text_url),
+    # remove code block markup
+    (re.compile(r"^```\S*\s*\n?", re.MULTILINE), ""),
+    # remove code inline markup
+    (re.compile(r"`([^`]+)`"), r"\1"),
+]
+
+
+def unmarkdown(text: str) -> str:
+    """
+    Convert Markdown text to plain text by applying a series of
+    regular expression replacements.
+
+    Args:
+        text (str): The Markdown text to be converted.
+
+    Returns:
+        str: The plain text result.
+    """
+    for pattern, replacement in _unmakdown_rules:
+        text = pattern.sub(replacement, text)
+    return text