change type for blog posts to BlogPost

front-matter · Feb 5, 2025 · 832400e · 832400e
1 parent 8ed82ba
commit 832400e
Show file tree

Hide file tree

Showing 24 changed files with 92 additions and 75 deletions.
diff --git a/commonmeta/__init__.py b/commonmeta/__init__.py
@@ -10,7 +10,7 @@
 """
 
 __title__ = "commonmeta-py"
-__version__ = "0.73"
+__version__ = "0.74"
 __author__ = "Martin Fenner"
 __license__ = "MIT"
 

diff --git a/commonmeta/constants.py b/commonmeta/constants.py
@@ -51,6 +51,7 @@ class Commonmeta(TypedDict):
 
 CM_TO_BIB_TRANSLATIONS = {
     "Article": "article",
+    "BlogPost": "article",
     "Book": "book",
     "BookChapter": "inbook",
     "Dissertation": "phdthesis",
@@ -97,7 +98,7 @@ class Commonmeta(TypedDict):
     "periodical": "Journal",
     "personal_communication": "PersonalCommunication",
     "post": "Post",
-    "post-weblog": "Article",
+    "post-weblog": "BlogPost",
     "regulation": "LegalDocument",
     "report": "Report",
     "review": "Review",
@@ -113,7 +114,7 @@ class Commonmeta(TypedDict):
 
 CM_TO_CSL_TRANSLATIONS = {
     "Article": "article",
-    "JournalArticle": "article-journal",
+    "BlogPost": "post-weblog",
     "Book": "book",
     "BookChapter": "chapter",
     "Collection": "collection",
@@ -123,6 +124,7 @@ class Commonmeta(TypedDict):
     "Event": "event",
     "Figure": "figure",
     "Image": "graphic",
+    "JournalArticle": "article-journal",
     "LegalDocument": "legal_case",
     "Manuscript": "manuscript",
     "Map": "map",
@@ -176,6 +178,7 @@ class Commonmeta(TypedDict):
 
 CM_TO_CR_TRANSLATIONS = {
     "Article": "PostedContent",
+    "BlogPost": "PostedContent",
     "BookChapter": "BookChapter",
     "BookSeries": "BookSeries",
     "Book": "Book",
@@ -200,7 +203,7 @@ class Commonmeta(TypedDict):
 # source: https://github.com/datacite/schema/blob/master/source/meta/kernel-4/include/datacite-resourceType-v4.xsd
 DC_TO_CM_TRANSLATIONS = {
     "Audiovisual": "Audiovisual",
-    "BlogPosting": "Article",
+    "BlogPosting": "BlogPost",
     "Book": "Book",
     "BookChapter": "BookChapter",
     "Collection": "Collection",
@@ -269,6 +272,7 @@ class Commonmeta(TypedDict):
 
 CM_TO_INVENIORDM_TRANSLATIONS = {
     "Article": "publication-preprint",
+    "BlogPost": "publication-preprint",
     "Book": "book",
     "Dataset": "dataset",
     "Image": "image-other",
@@ -281,6 +285,7 @@ class Commonmeta(TypedDict):
 CM_TO_DC_TRANSLATIONS = {
     "Article": "Preprint",
     "Audiovisual": "Audiovisual",
+    "BlogPost": "Preprint",
     "Book": "Book",
     "BookChapter": "BookChapter",
     "Collection": "Collection",
@@ -317,7 +322,7 @@ class Commonmeta(TypedDict):
     "ANCIENT": "Text",
     "ART": "Text",
     "BILL": "Text",
-    "BLOG": "Text",
+    "BLOG": "BlogPost",
     "BOOK": "Book",
     "CASE": "Text",
     "CHAP": "BookChapter",
@@ -370,6 +375,7 @@ class Commonmeta(TypedDict):
 CM_TO_RIS_TRANSLATIONS = {
     "Article": "JOUR",
     "Audiovisual": "VIDEO",
+    "BlogPost": "BLOG",
     "Book": "BOOK",
     "BookChapter": "CHAP",
     "Collection": "CTLG",
@@ -400,7 +406,7 @@ class Commonmeta(TypedDict):
 
 SO_TO_CM_TRANSLATIONS = {
     "Article": "Article",
-    "BlogPosting": "Article",
+    "BlogPosting": "BlogPost",
     "Book": "Book",
     "BookChapter": "BookChapter",
     "CreativeWork": "Other",
@@ -439,6 +445,7 @@ class Commonmeta(TypedDict):
 CM_TO_SO_TRANSLATIONS = {
     "Article": "Article",
     "Audiovisual": "CreativeWork",
+    "BlogPost": "BlogPosting",
     "Book": "Book",
     "BookChapter": "BookChapter",
     "Collection": "CreativeWork",

diff --git a/commonmeta/crossref_utils.py b/commonmeta/crossref_utils.py
@@ -36,13 +36,13 @@ def generate_crossref_xml(metadata: Commonmeta) -> Optional[str]:
 
 def insert_crossref_work(metadata, xml):
     """Insert crossref work"""
-    if metadata.type not in ["JournalArticle", "Article"]:
+    if metadata.type not in ["JournalArticle", "Article", "BlogPost"]:
         return xml
     if doi_from_url(metadata.id) is None or metadata.url is None:
         return xml
     if metadata.type == "JournalArticle":
         xml = insert_journal(metadata, xml)
-    elif metadata.type == "Article":
+    elif metadata.type in ["Article", "BlogPost"]:
         xml = insert_posted_content(metadata, xml)
 
 

diff --git a/commonmeta/readers/crossref_reader.py b/commonmeta/readers/crossref_reader.py
@@ -83,7 +83,8 @@ def editor_type(item):
     url = normalize_url(py_.get(meta, "resource.primary.URL"))
     titles = get_titles(meta)
     publisher = compact({"name": meta.get("publisher", None)})
-
+    if _type == "Article" and py_.get(publisher, "name") == "Front Matter":
+        _type = "BlogPost"
     date = compact(
         {
             "published": py_.get(meta, "issued.date-time")

diff --git a/commonmeta/readers/crossref_xml_reader.py b/commonmeta/readers/crossref_xml_reader.py
@@ -158,6 +158,9 @@ def read_crossref_xml(data: dict, **kwargs) -> Commonmeta:
         or py_.get(bibmeta, "doi_data.doi")
     )
     _type = CR_TO_CM_TRANSLATIONS.get(resource_type, "Other")
+    if _type == "Article" and py_.get(publisher, "name") == "Front Matter":
+        _type = "BlogPost"
+
     url = parse_attributes(py_.get(bibmeta, "doi_data.resource"))
     url = normalize_url(url)
     titles = crossref_titles(bibmeta)

diff --git a/commonmeta/readers/inveniordm_reader.py b/commonmeta/readers/inveniordm_reader.py
@@ -57,6 +57,9 @@ def read_inveniordm(data: dict, **kwargs) -> Commonmeta:
     publisher = meta.get("publisher", None) or py_.get(meta, "metadata.publisher")
     if publisher:
         publisher = {"name": publisher}
+    if _type == "Article" and py_.get(publisher, "name") == "Front Matter":
+        _type = "BlogPost"
+
     title = py_.get(meta, "metadata.title")
     titles = [{"title": sanitize(title)}] if title else None
     additional_titles = py_.get(meta, "metadata.additional_titles")

diff --git a/commonmeta/readers/json_feed_reader.py b/commonmeta/readers/json_feed_reader.py
@@ -54,7 +54,7 @@ def read_json_feed_item(data: Optional[dict], **kwargs) -> Commonmeta:
     ):
         url = normalize_url(meta.get("archive_url", None))
     _id = normalize_doi(read_options.get("doi", None) or meta.get("doi", None)) or url
-    _type = "Article"
+    _type = "BlogPost"
 
     # optionally generate a DOI if missing but a DOI prefix is provided
     prefix = read_options.get("prefix", None) or py_.get(meta, "blog.prefix", None)

diff --git a/commonmeta/readers/schema_org_reader.py b/commonmeta/readers/schema_org_reader.py
@@ -216,7 +216,7 @@ def read_schema_org(data: Optional[dict], **kwargs) -> Commonmeta:
                 "lastPage": meta.get("pageEnd", None),
             }
         )
-    elif _type == "Article":
+    elif _type in ["Article", "BlogPost"]:
         issn = py_.get(meta, "isPartOf.issn")
         container_url = py_.get(meta, "publisher.url")
         container = compact(

diff --git a/commonmeta/resources/commonmeta_v0.15.json b/commonmeta/resources/commonmeta_v0.15.json
@@ -533,6 +533,7 @@
       "enum": [
         "Article",
         "Audiovisual",
+        "BlogPost",
         "BookChapter",
         "BookPart",
         "BookSection",

diff --git a/commonmeta/writers/datacite_writer.py b/commonmeta/writers/datacite_writer.py
@@ -47,6 +47,8 @@ def write_datacite(metadata: Commonmeta) -> Optional[Union[str, dict]]:
 
     resource__typegeneral = CM_TO_DC_TRANSLATIONS.get(metadata.type, "Other")
     resource_type = CM_TO_CR_TRANSLATIONS.get(metadata.type, "Other")
+    if metadata.type == "BlogPost":
+        resource_type = "BlogPost"
     if resource__typegeneral == resource_type or resource__typegeneral in [
         "Dataset",
         "JournalArticle",

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "commonmeta-py"
-version = "0.73"
+version = "0.74"
 description = "Library for conversions to/from the Commonmeta scholarly metadata format"
 authors = [{ name = "Martin Fenner", email = "[email protected]" }]
 requires-python = ">=3.9,<4.0"

diff --git a/tests/test-bibtex_writer.py b/tests/test-bibtex_writer.py
@@ -43,7 +43,7 @@ def test_doi_for_blog_post():
     "DOi for blog post"
     subject = Metadata("10.53731/avg2ykg-gdxppcd")
     assert subject.id == "https://doi.org/10.53731/avg2ykg-gdxppcd"
-    assert subject.type == "Article"
+    assert subject.type == "BlogPost"
 
     bibtex = subject.write(to="bibtex")
 
@@ -72,7 +72,7 @@ def test_blog_post():
     string = "https://upstream.force11.org/welcome-to-upstream/"
     subject = Metadata(string)
     assert subject.id == "https://doi.org/10.54900/rckn8ey-1fm76va-qsrnf"
-    assert subject.type == "Article"
+    assert subject.type == "BlogPost"
     bibtex = subject.write(to="bibtex")
 
     assert (
@@ -199,7 +199,7 @@ def test_book_chapter():
     doi = {10.1007/978-3-662-46370-3_13},
     isbn = {9783662463703},
     language = {en},
-    month = jan,
+    month = feb,
     pages = {155--158},
     publisher = {Springer Berlin Heidelberg},
     title = {Clinical Symptoms and Physical Examinations},
@@ -226,7 +226,7 @@ def test_conference_proceedings():
     author = {Sinop, Ali Kemal and Grady, Leo},
     booktitle = {2007 IEEE 11th International Conference on Computer Vision},
     doi = {10.1109/iccv.2007.4408927},
-    month = jan,
+    month = feb,
     pages = {1--8},
     publisher = {IEEE},
     title = {A Seeded Image Segmentation Framework Unifying Graph Cuts And Random Walker Which Yields A New Algorithm},
@@ -444,7 +444,7 @@ def test_kbase_gulf_of_mexico():
     copyright = {https://creativecommons.org/licenses/by/4.0/},
     doi = {10.25982/86723.65/1778009},
     language = {en},
-    month = jan,
+    month = feb,
     publisher = {KBase},
     title = {Gulf of Mexico blue hole harbors high levels of novel microbial lineages: A load of cool stuff from the blue hole in the Gulf of Mexico},
     urldate = {2021},

diff --git a/tests/test-commonmeta_writer.py b/tests/test-commonmeta_writer.py
@@ -146,7 +146,7 @@ def test_write_commonmeta_list_json_feed():
     assert len(commonmeta_list["items"]) == 15
     commonmeta = commonmeta_list["items"][0]
     assert commonmeta["id"] == "https://doi.org/10.59350/26ft6-dmv65"
-    assert commonmeta["type"] == "Article"
+    assert commonmeta["type"] == "BlogPost"
     assert commonmeta["titles"] == [
         {
             "title": "Das BUA Open Science Dashboard Projekt: die Entwicklung disziplinspezifischer Open-Science-Indikatoren"
@@ -163,7 +163,7 @@ def test_write_commonmeta_missing_doi():
     commonmeta = json.loads(subject.write())
     assert re.match(r"\A(https://doi\.org/10\.59350/.+)\Z", commonmeta["id"])
     assert commonmeta["url"] == "https://www.ideasurg.pub/residency-visual-abstract"
-    assert commonmeta["type"] == "Article"
+    assert commonmeta["type"] == "BlogPost"
 
 
 def test_write_commonmeta_missing_doi_no_prefix():
@@ -177,7 +177,7 @@ def test_write_commonmeta_missing_doi_no_prefix():
     commonmeta = json.loads(subject.write())
     assert commonmeta["id"] == "https://www.ideasurg.pub/residency-visual-abstract"
     assert commonmeta["url"] == "https://www.ideasurg.pub/residency-visual-abstract"
-    assert commonmeta["type"] == "Article"
+    assert commonmeta["type"] == "BlogPost"
 
 
 def test_write_commonmeta_missing_doi_prefix():
@@ -191,4 +191,4 @@ def test_write_commonmeta_missing_doi_prefix():
     commonmeta = json.loads(subject.write())
     assert re.match(r"\A(https://doi\.org/10\.5555/.+)\Z", commonmeta["id"])
     assert commonmeta["url"] == "https://www.ideasurg.pub/residency-visual-abstract"
-    assert commonmeta["type"] == "Article"
+    assert commonmeta["type"] == "BlogPost"
diff --git a/tests/test-crossref_reader.py b/tests/test-crossref_reader.py
@@ -437,7 +437,7 @@ def test_blog_post():
     subject = Metadata(string)
     assert subject.is_valid
     assert subject.id == "https://doi.org/10.53731/ybhah-9jy85"
-    assert subject.type == "Article"
+    assert subject.type == "BlogPost"
     assert (
         subject.url
         == "https://blog.front-matter.io/posts/the-rise-of-the-science-newsletter"

diff --git a/tests/test-crossref_xml_writer.py b/tests/test-crossref_xml_writer.py
@@ -322,7 +322,7 @@ def test_json_feed_item_upstream_blog():
     subject = Metadata(string)
     assert subject.is_valid
     assert subject.id == "https://doi.org/10.54900/n6dnt-xpq48"
-    assert subject.type == "Article"
+    assert subject.type == "BlogPost"
     crossref_xml = parse_xml(subject.write(to="crossref_xml"), dialect="crossref")
     crossref_xml = py_.get(crossref_xml, "doi_batch.body.posted_content", {})
     assert len(py_.get(crossref_xml, "contributors.person_name")) == 1
@@ -496,7 +496,7 @@ def test_ghost_with_affiliations():
     subject = Metadata(string)
     assert subject.is_valid
     assert subject.id == "https://doi.org/10.53731/r796hz1-97aq74v-ag4f3"
-    assert subject.type == "Article"
+    assert subject.type == "BlogPost"
     assert len(subject.contributors) == 1
     assert subject.contributors[0] == {
         "type": "Person",
@@ -546,11 +546,11 @@ def test_json_feed_item_with_organizational_author():
     ]
     crossref_xml = parse_xml(subject.write(to="crossref_xml"), dialect="crossref")
     crossref_xml = py_.get(crossref_xml, "doi_batch.body.posted_content", {})
-    assert py_.get(crossref_xml, "contributors.organization") is None
-    assert py_.get(crossref_xml, "titles.0.title") is None
-    assert py_.get(crossref_xml, "doi_data.collection.item") is None
-    assert py_.get(crossref_xml, "doi_data.collection.item.0.resource") is None
-    assert crossref_xml.get("group_title") is None
+    assert py_.get(crossref_xml, "contributors.organization") ==  [{'#text': 'Liberate Science', 'contributor_role': 'author', 'sequence': 'first'}]
+    assert py_.get(crossref_xml, "titles.0.title") == "KU Leuven supports ResearchEquals"
+    assert len(py_.get(crossref_xml, "doi_data.collection.item")) == 5
+    assert py_.get(crossref_xml, "doi_data.collection.item.0.resource") == {'#text': 'https://libscie.org/ku-leuven-supports-researchequals', 'mime_type': 'text/html'}
+    assert crossref_xml.get("group_title") == 'Social science'
 
 
 @pytest.mark.vcr

diff --git a/tests/test-csl_reader.py b/tests/test-csl_reader.py
@@ -10,7 +10,7 @@ def test_blog_posting():
     subject = Metadata(string)
     assert subject.is_valid
     assert subject.id == "https://doi.org/10.5438/4k3m-nyvg"
-    assert subject.type == "Article"
+    assert subject.type == "BlogPost"
     assert subject.url == "https://blog.datacite.org/eating-your-own-dog-food"
     assert subject.contributors == [
         {
@@ -35,7 +35,7 @@ def test_no_categories():
     subject = Metadata(string)
     assert subject.is_valid
     assert subject.id == "https://doi.org/10.5072/4k3m-nyvg"
-    assert subject.type == "Article"
+    assert subject.type == "BlogPost"
     assert subject.url == "https://blog.datacite.org/eating-your-own-dog-food"
     assert subject.contributors == [
         {
@@ -57,7 +57,7 @@ def test_no_author():
     subject = Metadata(string)
     assert subject.is_valid
     assert subject.id == "https://doi.org/10.5438/4k3m-nyvg"
-    assert subject.type == "Article"
+    assert subject.type == "BlogPost"
     assert subject.url == "https://blog.datacite.org/eating-your-own-dog-food"
     assert subject.contributors is None
     assert subject.titles == [{"title": "Eating your own Dog Food"}]

diff --git a/tests/test-datacite_reader.py b/tests/test-datacite_reader.py
@@ -600,7 +600,7 @@ def test_datacite_json():
     assert subject.is_valid
     assert subject.id == "https://doi.org/10.5438/4k3m-nyvg"
     assert subject.url == "https://datacite.org/blog/eating-your-own-dog-food"
-    assert subject.type == "Article"
+    assert subject.type == "BlogPost"
     assert subject.titles[0] == {"title": "Eating your own Dog Food"}
     assert len(subject.contributors) == 1
     assert subject.contributors[0] == {