From 6bc0cb8b43bc41b403f0ae7c60c53fc982ad165b Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 01:08:34 +0000 Subject: [PATCH 01/15] feat: database support --- .gitignore | 4 +- .yamlfmt | 3 +- dlt_source_notion/__init__.py | 128 ++++++- dlt_source_notion/client.py | 6 +- dlt_source_notion/model/__init__.py | 0 dlt_source_notion/model/notion_2022_06_28.py | 347 +++++++++++++++++++ dlt_source_notion/type_adapters.py | 7 + notion_pipeline.py | 2 +- pyproject.toml | 2 + uv.lock | 79 +++++ 10 files changed, 557 insertions(+), 21 deletions(-) create mode 100644 dlt_source_notion/model/__init__.py create mode 100644 dlt_source_notion/model/notion_2022_06_28.py create mode 100644 dlt_source_notion/type_adapters.py diff --git a/.gitignore b/.gitignore index ade8080..80e8d37 100644 --- a/.gitignore +++ b/.gitignore @@ -178,4 +178,6 @@ secrets.toml .DS_Store -.python-version \ No newline at end of file +.python-version + +/*.json \ No newline at end of file diff --git a/.yamlfmt b/.yamlfmt index fe04c78..20eff81 100644 --- a/.yamlfmt +++ b/.yamlfmt @@ -2,5 +2,4 @@ gitignore_excludes: true eof_newline: true exclude: - .yamllint - - .pre-commit-config.yaml - - dlt_source_notion/model/spec \ No newline at end of file + - .pre-commit-config.yaml \ No newline at end of file diff --git a/dlt_source_notion/__init__.py b/dlt_source_notion/__init__.py index 8452cca..4b658c3 100644 --- a/dlt_source_notion/__init__.py +++ b/dlt_source_notion/__init__.py @@ -1,12 +1,38 @@ """A source loading entities and lists from notion (notion.com)""" from enum import StrEnum -from typing import Dict, Iterable, List, Sequence +import json +from typing import Any, Iterable, Sequence import dlt + +from dlt.common import json +from dlt.common.json import JsonSerializable from dlt.sources import DltResource, TDataItem -from dlt_source_notion.client import get_notion_client + from notion_client.helpers import iterate_paginated_api +from pydantic import AnyUrl, BaseModel + +from .client import get_notion_client +from .model.notion_2022_06_28 import Database, Page, User +from .type_adapters import user_adapter, object_adapter + + +def anyurl_encoder(obj: Any) -> JsonSerializable: + if isinstance(obj, AnyUrl): + return obj.unicode_string() + raise TypeError(f"Object of type {type(obj)} is not JSON serializable") + + +json.set_custom_encoder(anyurl_encoder) + + +def pydantic_model_dump(model: BaseModel, **kwargs): + """ + Dumps a Pydantic model to a dictionary, using the model's field names as keys and NOT observing the field aliases, + which is important for DLT to correctly map the data to the destination. + """ + return model.model_dump(by_alias=True, **kwargs) class Table(StrEnum): @@ -14,8 +40,8 @@ class Table(StrEnum): BOTS = "bots" -def use_id(entity: Dict, **kwargs) -> Dict: - return filter_dict(entity, **kwargs) | {"_dlt_id": __get_id(entity)} +def use_id(entity: User, **kwargs) -> dict: + return pydantic_model_dump(entity, **kwargs) | {"_dlt_id": __get_id(entity)} def __get_id(obj): @@ -33,22 +59,20 @@ def list_users() -> Iterable[TDataItem]: notion = get_notion_client() - yield from iterate_paginated_api(notion.users.list) - - -def filter_dict(d: Dict, exclude_keys: List[str]) -> Dict: - return {k: v for k, v in d.items() if k not in exclude_keys} + for user in iterate_paginated_api(notion.users.list): + yield user_adapter.validate_python(user) @dlt.transformer( parallelized=True, + name="users", ) -def split_user(user: Dict): +def split_user(user: User): - match user["type"]: + match user.type: case "bot": yield dlt.mark.with_hints( - item=use_id(user, exclude_keys=["type", "object"]), + item=use_id(user, exclude=["type", "object"]), hints=dlt.mark.make_hints( table_name=Table.BOTS.value, primary_key="id", @@ -59,7 +83,7 @@ def split_user(user: Dict): ) case "person": yield dlt.mark.with_hints( - item=use_id(user, exclude_keys=["bot", "type", "object"]), + item=use_id(user, exclude=["bot", "type", "object"]), hints=dlt.mark.make_hints( table_name=Table.PERSONS.value, primary_key="id", @@ -70,6 +94,75 @@ def split_user(user: Dict): ) +@dlt.resource( + selected=True, + parallelized=True, + primary_key="id", + max_table_nesting=1, +) +def database_resource( + database_id: str, +) -> Iterable[TDataItem]: + + notion = get_notion_client() + + db_raw = notion.databases.retrieve(database_id) + db: Database = object_adapter.validate_python(db_raw) + assert isinstance(db, Database) + + selected_properties = [p.name for p in db.properties.values() if p.name is not None] + + for page_raw in iterate_paginated_api( + notion.databases.query, database_id=database_id + ): + page: Page = object_adapter.validate_python(page_raw) + assert isinstance(page, Page) + + row = {} + for selected_property in selected_properties: + prop = page.properties[selected_property] + + match prop.type: + case "title": + row[selected_property] = " ".join( + [t.text.content for t in prop.title] + ) + case "rich_text": + row[selected_property] = " ".join( + [t.text.content for t in prop.rich_text] + ) + case "number": + row[selected_property] = prop.number + case "select": + if prop.select is None: + row[selected_property] = None + continue + row[selected_property] = prop.select.id + case "multi_select": + row[selected_property] = [s.id for s in prop.multi_select] + case "date": + if prop.date is None: + row[selected_property] = None + continue + if prop.date.end: + # we have a range + row[selected_property] = prop.date + else: + row[selected_property] = prop.date.start + case "people": + row[selected_property] = [p.id for p in prop.people] + case "last_edited_by": + row[selected_property] = prop.last_edited_by.id + case "last_edited_time": + row[selected_property] = prop.last_edited_time + case "relation": + row[selected_property] = [r.id for r in prop.relation] + case _: + # See https://developers.notion.com/reference/page-property-values + raise ValueError(f"Unsupported property type: {prop.type}") + yield pydantic_model_dump(page, exclude=["properties", "object"]) | row + + @dlt.source(name="notion") def source( limit: int = -1, @@ -78,7 +171,12 @@ def source( if limit != -1: users.add_limit(limit) - return users | split_user + db_rs = database_resource(database_id="...") + + return ( + users | split_user, + db_rs, + ) -__all__ = ["source"] +__all__ = ["source", "database_resource"] diff --git a/dlt_source_notion/client.py b/dlt_source_notion/client.py index dfc4c9e..55da02a 100644 --- a/dlt_source_notion/client.py +++ b/dlt_source_notion/client.py @@ -2,5 +2,7 @@ from notion_client import Client -def get_notion_client(): - return Client(auth=dlt.secrets["notion_token"]) +def get_notion_client() -> Client: + if not hasattr(get_notion_client, "client"): + get_notion_client.client = Client(auth=dlt.secrets["notion_token"]) + return get_notion_client.client diff --git a/dlt_source_notion/model/__init__.py b/dlt_source_notion/model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dlt_source_notion/model/notion_2022_06_28.py b/dlt_source_notion/model/notion_2022_06_28.py new file mode 100644 index 0000000..901d6db --- /dev/null +++ b/dlt_source_notion/model/notion_2022_06_28.py @@ -0,0 +1,347 @@ +from __future__ import annotations +from typing import List, Optional, Literal, Dict, Any, Annotated +from uuid import UUID +from pydantic import AnyUrl, BaseModel, ConfigDict, Field, constr +from pydantic_extra_types.pendulum_dt import DateTime, Date + + +class Empty(BaseModel): + ... + model_config = ConfigDict(extra="forbid") + + +# Define a custom type for Notion property IDs. +PropertyID = constr(pattern=r"^[A-Za-z0-9%~\\]+$") + + +class UserReference(BaseModel): + object: Literal["user"] + id: UUID + + +class UserBase(UserReference): + name: str + avatar_url: Optional[AnyUrl] = None + + +class PersonProperties(BaseModel): + email: str + + +# Person model +class Person(UserBase): + type: Literal["person"] + person: PersonProperties + + +class WorkspaceBotOwner(BaseModel): + type: Literal["workspace"] = None + workspace: Optional[bool] = None + + +class UserBotOwner(BaseModel): + type: Literal["user"] = None + user: User + + +# The discriminated union using the "type" field as the discriminator +BotOwner = Annotated[ + UserBotOwner | WorkspaceBotOwner, + Field(discriminator="type"), +] + + +class BotProperties(BaseModel): + owner: BotOwner + workspace_name: Optional[str] = None + + +# Bot model +class Bot(UserBase): + type: Literal["bot"] + bot: BotProperties | Empty + + +# The discriminated union for User using the "type" field as the discriminator +User = Annotated[Person | Bot, Field(discriminator="type")] + + +class Link(BaseModel): + url: AnyUrl + + +# Models for the title field +class TextContent(BaseModel): + content: str + link: Optional[Link] = None + + +class TextItemAnnotations(BaseModel): + bold: bool + italic: bool + strikethrough: bool + underline: bool + code: bool + color: str + + +class TextItem(BaseModel): + type: Literal["text"] + text: TextContent + annotations: TextItemAnnotations + plain_text: str + href: Optional[AnyUrl] = None + + +class ReferenceBase(BaseModel): + type: str # This field is used as the discriminator + + +class PageReference(ReferenceBase): + type: Literal["page_id"] + page_id: UUID + + +class DatabaseReference(ReferenceBase): + type: Literal["database_id"] + database_id: UUID + + +# The discriminated union for Property using the "type" field as the discriminator +Reference = Annotated[PageReference | DatabaseReference, Field(discriminator="type")] + + +# Base class for Property (used as discriminator) +class PropertyBase(BaseModel): + id: PropertyID # type: ignore + name: Optional[str] = None # This is only available in the database item itself + type: str # This field is used as the discriminator + + +# People property model +class EmptyPeopleProperty(PropertyBase): + type: Literal["people"] + people: Empty + + +class PeopleProperty(EmptyPeopleProperty): + people: List[User] + + +# Rich text property model +class EmptyRichTextProperty(PropertyBase): + type: Literal["rich_text"] + rich_text: Empty + + +class RichTextProperty(EmptyRichTextProperty): + rich_text: List[TextItem] + + +NamedColor = Literal[ + "default", + "gray", + "brown", + "orange", + "yellow", + "green", + "blue", + "purple", + "pink", + "red", +] + + +# Multi-select property models +class MultiSelectOption(BaseModel): + id: PropertyID | UUID # type: ignore + name: str + color: NamedColor + description: Optional[str] = None + + +class MultiSelectData(BaseModel): + options: List[MultiSelectOption] + + +class MultiSelectPropertyBase(PropertyBase): + type: Literal["multi_select"] + + +class EmptyMultiSelectProperty(MultiSelectPropertyBase): + multi_select: MultiSelectData + + +class MultiSelectProperty(MultiSelectPropertyBase): + multi_select: List[MultiSelectOption] + + +# Last edited time property model +class EmptyLastEditedTimeProperty(PropertyBase): + type: Literal["last_edited_time"] + last_edited_time: Empty + + +class LastEditedTimeProperty(EmptyLastEditedTimeProperty): + last_edited_time: DateTime + + +class DateData(BaseModel): + """ + https://developers.notion.com/reference/page-property-values#date + """ + + start: DateTime | Date + end: Optional[DateTime] = None + time_zone: Optional[Any] = None # TODO: Update type when structure is known + + +# Date property model +class EmptyDateProperty(PropertyBase): + type: Literal["date"] + date: Empty + + +class DateProperty(EmptyDateProperty): + date: Optional[DateData] = None + + +# Number property models +class EmptyNumberData(BaseModel): + format: Literal["number"] + + +class EmptyNumberProperty(PropertyBase): + type: Literal["number"] + number: EmptyNumberData + + +class NumberProperty(EmptyNumberProperty): + number: Optional[int | float] = None + + +# Select property models +class SelectOptionBase(BaseModel): + id: UUID + name: str + color: NamedColor + + +class EmptySelectOption(SelectOptionBase): + description: Optional[str] = None + + +class SelectOption(SelectOptionBase): + pass + + +class SelectData(BaseModel): + options: List[EmptySelectOption] + + +class SelectPropertyBase(PropertyBase): + type: Literal["select"] + + +class EmptySelectProperty(SelectPropertyBase): + select: SelectData + + +class SelectProperty(SelectPropertyBase): + select: Optional[SelectOption] = None + + +# Last edited by property model +class EmptyLastEditedByProperty(PropertyBase): + type: Literal["last_edited_by"] + last_edited_by: Empty + + +class LastEditedByProperty(EmptyLastEditedByProperty): + last_edited_by: User + + +# Title property model + + +class EmptyTitleProperty(PropertyBase): + type: Literal["title"] + title: Empty + + +class TitleProperty(EmptyTitleProperty): + title: List[TextItem] + + +class IdReference(BaseModel): + id: UUID + + +class RelationProperty(PropertyBase): + type: Literal["relation"] + relation: List[IdReference] + has_more: bool + + +# The discriminated union for Property using the "type" field as the discriminator +Property = Annotated[ + PeopleProperty + | RichTextProperty + | MultiSelectProperty + | LastEditedTimeProperty + | DateProperty + | NumberProperty + | SelectProperty + | LastEditedByProperty + | TitleProperty + | RelationProperty, + Field(discriminator="type"), +] + +# The discriminated union for GenericProperty using the "type" field as the discriminator +GenericProperty = Annotated[ + EmptyPeopleProperty + | EmptyRichTextProperty + | EmptyMultiSelectProperty + | EmptyLastEditedTimeProperty + | EmptyDateProperty + | EmptyNumberProperty + | EmptySelectProperty + | EmptyLastEditedByProperty + | EmptyTitleProperty, + Field(discriminator="type"), +] + + +class ObjectBase(BaseModel): + object: str # This field is used as the discriminator + id: UUID + cover: Optional[Any] = None # TODO: Update type when structure is known + icon: Optional[Any] = None # TODO: Update type when structure is known + created_time: DateTime + created_by: UserReference + last_edited_by: UserReference + last_edited_time: DateTime + parent: Reference + archived: bool + in_trash: bool + url: AnyUrl + public_url: Optional[AnyUrl] = None + + +class Page(ObjectBase): + object: Literal["page"] + properties: Dict[str, Property] + + +# Main model for the database object +class Database(ObjectBase): + object: Literal["database"] + title: List[TextItem] + description: List[Any] # TODO: Update type when structure is known + is_inline: bool + properties: Dict[str, GenericProperty] + request_id: UUID + + +NotionObject = Annotated[Page | Database, Field(discriminator="object")] diff --git a/dlt_source_notion/type_adapters.py b/dlt_source_notion/type_adapters.py new file mode 100644 index 0000000..c8e8c24 --- /dev/null +++ b/dlt_source_notion/type_adapters.py @@ -0,0 +1,7 @@ +from pydantic import TypeAdapter + +from .model.notion_2022_06_28 import NotionObject, User + + +user_adapter = TypeAdapter(User) +object_adapter = TypeAdapter(NotionObject) diff --git a/notion_pipeline.py b/notion_pipeline.py index d8a4e0f..94097ac 100644 --- a/notion_pipeline.py +++ b/notion_pipeline.py @@ -9,7 +9,7 @@ def load_notion_data() -> None: pipeline_name="notion_pipeline", destination="duckdb", dev_mode=DEV_MODE ) data = source( - limit=-1 if not DEV_MODE else 1, + # limit=-1 if not DEV_MODE else 1, ) info = pipeline.run( data, diff --git a/pyproject.toml b/pyproject.toml index 8c3e5ab..80cabd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,8 @@ requires-python = ">=3.12" dependencies = [ "dlt>=1.8.1", "notion-client>=2.3.0", + "pydantic-extra-types>=2.10.3", + "pydantic>=2.10.6", ] dynamic = ["authors", "classifiers", "version", "description"] diff --git a/uv.lock b/uv.lock index f05bfd6..cc9dea5 100644 --- a/uv.lock +++ b/uv.lock @@ -21,6 +21,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/aa/f3/0b6ced594e51cc95d8c1fc1640d3623770d01e4969d29c0bd09945fafefa/altair-5.5.0-py3-none-any.whl", hash = "sha256:91a310b926508d560fe0148d02a194f38b824122641ef528113d029fcd129f8c", size = 731200 }, ] +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, +] + [[package]] name = "anyio" version = "4.9.0" @@ -244,6 +253,8 @@ source = { virtual = "." } dependencies = [ { name = "dlt" }, { name = "notion-client" }, + { name = "pydantic" }, + { name = "pydantic-extra-types" }, ] [package.optional-dependencies] @@ -263,6 +274,8 @@ requires-dist = [ { name = "dlt", specifier = ">=1.8.1" }, { name = "dlt", extras = ["duckdb"], marker = "extra == 'show'", specifier = ">=1.8.1" }, { name = "notion-client", specifier = ">=2.3.0" }, + { name = "pydantic", specifier = ">=2.10.6" }, + { name = "pydantic-extra-types", specifier = ">=2.10.3" }, { name = "streamlit", marker = "extra == 'show'", specifier = ">=1.41.1" }, { name = "watchdog", marker = "extra == 'show'", specifier = ">=6.0.0" }, ] @@ -803,6 +816,72 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 }, ] +[[package]] +name = "pydantic" +version = "2.10.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b7/ae/d5220c5c52b158b1de7ca89fc5edb72f304a70a4c540c84c8844bf4008de/pydantic-2.10.6.tar.gz", hash = "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236", size = 761681 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/3c/8cc1cc84deffa6e25d2d0c688ebb80635dfdbf1dbea3e30c541c8cf4d860/pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584", size = 431696 }, +] + +[[package]] +name = "pydantic-core" +version = "2.27.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/01/f3e5ac5e7c25833db5eb555f7b7ab24cd6f8c322d3a3ad2d67a952dc0abc/pydantic_core-2.27.2.tar.gz", hash = "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39", size = 413443 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d6/74/51c8a5482ca447871c93e142d9d4a92ead74de6c8dc5e66733e22c9bba89/pydantic_core-2.27.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0", size = 1893127 }, + { url = "https://files.pythonhosted.org/packages/d3/f3/c97e80721735868313c58b89d2de85fa80fe8dfeeed84dc51598b92a135e/pydantic_core-2.27.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef", size = 1811340 }, + { url = "https://files.pythonhosted.org/packages/9e/91/840ec1375e686dbae1bd80a9e46c26a1e0083e1186abc610efa3d9a36180/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7", size = 1822900 }, + { url = "https://files.pythonhosted.org/packages/f6/31/4240bc96025035500c18adc149aa6ffdf1a0062a4b525c932065ceb4d868/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:519f29f5213271eeeeb3093f662ba2fd512b91c5f188f3bb7b27bc5973816934", size = 1869177 }, + { url = "https://files.pythonhosted.org/packages/fa/20/02fbaadb7808be578317015c462655c317a77a7c8f0ef274bc016a784c54/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05e3a55d124407fffba0dd6b0c0cd056d10e983ceb4e5dbd10dda135c31071d6", size = 2038046 }, + { url = "https://files.pythonhosted.org/packages/06/86/7f306b904e6c9eccf0668248b3f272090e49c275bc488a7b88b0823444a4/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c3ed807c7b91de05e63930188f19e921d1fe90de6b4f5cd43ee7fcc3525cb8c", size = 2685386 }, + { url = "https://files.pythonhosted.org/packages/8d/f0/49129b27c43396581a635d8710dae54a791b17dfc50c70164866bbf865e3/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fb4aadc0b9a0c063206846d603b92030eb6f03069151a625667f982887153e2", size = 1997060 }, + { url = "https://files.pythonhosted.org/packages/0d/0f/943b4af7cd416c477fd40b187036c4f89b416a33d3cc0ab7b82708a667aa/pydantic_core-2.27.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28ccb213807e037460326424ceb8b5245acb88f32f3d2777427476e1b32c48c4", size = 2004870 }, + { url = "https://files.pythonhosted.org/packages/35/40/aea70b5b1a63911c53a4c8117c0a828d6790483f858041f47bab0b779f44/pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:de3cd1899e2c279b140adde9357c4495ed9d47131b4a4eaff9052f23398076b3", size = 1999822 }, + { url = "https://files.pythonhosted.org/packages/f2/b3/807b94fd337d58effc5498fd1a7a4d9d59af4133e83e32ae39a96fddec9d/pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:220f892729375e2d736b97d0e51466252ad84c51857d4d15f5e9692f9ef12be4", size = 2130364 }, + { url = "https://files.pythonhosted.org/packages/fc/df/791c827cd4ee6efd59248dca9369fb35e80a9484462c33c6649a8d02b565/pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a0fcd29cd6b4e74fe8ddd2c90330fd8edf2e30cb52acda47f06dd615ae72da57", size = 2158303 }, + { url = "https://files.pythonhosted.org/packages/9b/67/4e197c300976af185b7cef4c02203e175fb127e414125916bf1128b639a9/pydantic_core-2.27.2-cp312-cp312-win32.whl", hash = "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc", size = 1834064 }, + { url = "https://files.pythonhosted.org/packages/1f/ea/cd7209a889163b8dcca139fe32b9687dd05249161a3edda62860430457a5/pydantic_core-2.27.2-cp312-cp312-win_amd64.whl", hash = "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9", size = 1989046 }, + { url = "https://files.pythonhosted.org/packages/bc/49/c54baab2f4658c26ac633d798dab66b4c3a9bbf47cff5284e9c182f4137a/pydantic_core-2.27.2-cp312-cp312-win_arm64.whl", hash = "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b", size = 1885092 }, + { url = "https://files.pythonhosted.org/packages/41/b1/9bc383f48f8002f99104e3acff6cba1231b29ef76cfa45d1506a5cad1f84/pydantic_core-2.27.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7d14bd329640e63852364c306f4d23eb744e0f8193148d4044dd3dacdaacbd8b", size = 1892709 }, + { url = "https://files.pythonhosted.org/packages/10/6c/e62b8657b834f3eb2961b49ec8e301eb99946245e70bf42c8817350cbefc/pydantic_core-2.27.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82f91663004eb8ed30ff478d77c4d1179b3563df6cdb15c0817cd1cdaf34d154", size = 1811273 }, + { url = "https://files.pythonhosted.org/packages/ba/15/52cfe49c8c986e081b863b102d6b859d9defc63446b642ccbbb3742bf371/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71b24c7d61131bb83df10cc7e687433609963a944ccf45190cfc21e0887b08c9", size = 1823027 }, + { url = "https://files.pythonhosted.org/packages/b1/1c/b6f402cfc18ec0024120602bdbcebc7bdd5b856528c013bd4d13865ca473/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa8e459d4954f608fa26116118bb67f56b93b209c39b008277ace29937453dc9", size = 1868888 }, + { url = "https://files.pythonhosted.org/packages/bd/7b/8cb75b66ac37bc2975a3b7de99f3c6f355fcc4d89820b61dffa8f1e81677/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce8918cbebc8da707ba805b7fd0b382816858728ae7fe19a942080c24e5b7cd1", size = 2037738 }, + { url = "https://files.pythonhosted.org/packages/c8/f1/786d8fe78970a06f61df22cba58e365ce304bf9b9f46cc71c8c424e0c334/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3f5c2a021bbc5d976107bb302e0131351c2ba54343f8a496dc8783d3d3a6a", size = 2685138 }, + { url = "https://files.pythonhosted.org/packages/a6/74/d12b2cd841d8724dc8ffb13fc5cef86566a53ed358103150209ecd5d1999/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd8086fa684c4775c27f03f062cbb9eaa6e17f064307e86b21b9e0abc9c0f02e", size = 1997025 }, + { url = "https://files.pythonhosted.org/packages/a0/6e/940bcd631bc4d9a06c9539b51f070b66e8f370ed0933f392db6ff350d873/pydantic_core-2.27.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8d9b3388db186ba0c099a6d20f0604a44eabdeef1777ddd94786cdae158729e4", size = 2004633 }, + { url = "https://files.pythonhosted.org/packages/50/cc/a46b34f1708d82498c227d5d80ce615b2dd502ddcfd8376fc14a36655af1/pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7a66efda2387de898c8f38c0cf7f14fca0b51a8ef0b24bfea5849f1b3c95af27", size = 1999404 }, + { url = "https://files.pythonhosted.org/packages/ca/2d/c365cfa930ed23bc58c41463bae347d1005537dc8db79e998af8ba28d35e/pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:18a101c168e4e092ab40dbc2503bdc0f62010e95d292b27827871dc85450d7ee", size = 2130130 }, + { url = "https://files.pythonhosted.org/packages/f4/d7/eb64d015c350b7cdb371145b54d96c919d4db516817f31cd1c650cae3b21/pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ba5dd002f88b78a4215ed2f8ddbdf85e8513382820ba15ad5ad8955ce0ca19a1", size = 2157946 }, + { url = "https://files.pythonhosted.org/packages/a4/99/bddde3ddde76c03b65dfd5a66ab436c4e58ffc42927d4ff1198ffbf96f5f/pydantic_core-2.27.2-cp313-cp313-win32.whl", hash = "sha256:1ebaf1d0481914d004a573394f4be3a7616334be70261007e47c2a6fe7e50130", size = 1834387 }, + { url = "https://files.pythonhosted.org/packages/71/47/82b5e846e01b26ac6f1893d3c5f9f3a2eb6ba79be26eef0b759b4fe72946/pydantic_core-2.27.2-cp313-cp313-win_amd64.whl", hash = "sha256:953101387ecf2f5652883208769a79e48db18c6df442568a0b5ccd8c2723abee", size = 1990453 }, + { url = "https://files.pythonhosted.org/packages/51/b2/b2b50d5ecf21acf870190ae5d093602d95f66c9c31f9d5de6062eb329ad1/pydantic_core-2.27.2-cp313-cp313-win_arm64.whl", hash = "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b", size = 1885186 }, +] + +[[package]] +name = "pydantic-extra-types" +version = "2.10.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/53/fa/6b268a47839f8af46ffeb5bb6aee7bded44fbad54e6bf826c11f17aef91a/pydantic_extra_types-2.10.3.tar.gz", hash = "sha256:dcc0a7b90ac9ef1b58876c9b8fdede17fbdde15420de9d571a9fccde2ae175bb", size = 95128 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/0a/f6f8e5f79d188e2f3fa9ecfccfa72538b685985dd5c7c2886c67af70e685/pydantic_extra_types-2.10.3-py3-none-any.whl", hash = "sha256:e8b372752b49019cd8249cc192c62a820d8019f5382a8789d0f887338a59c0f3", size = 37175 }, +] + [[package]] name = "pydeck" version = "0.9.1" From 21b1c9647a0441b128d0fbf613373344a82e7472 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 01:10:05 +0000 Subject: [PATCH 02/15] Update notion_pipeline.py --- notion_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notion_pipeline.py b/notion_pipeline.py index 94097ac..d8a4e0f 100644 --- a/notion_pipeline.py +++ b/notion_pipeline.py @@ -9,7 +9,7 @@ def load_notion_data() -> None: pipeline_name="notion_pipeline", destination="duckdb", dev_mode=DEV_MODE ) data = source( - # limit=-1 if not DEV_MODE else 1, + limit=-1 if not DEV_MODE else 1, ) info = pipeline.run( data, From b7b0bd78c7149f3b1d079693d7bda166318a03f9 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 11:07:10 +0000 Subject: [PATCH 03/15] wip(today's fortune): You dialed 5483. --- dlt_source_notion/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlt_source_notion/__init__.py b/dlt_source_notion/__init__.py index 4b658c3..33e8f47 100644 --- a/dlt_source_notion/__init__.py +++ b/dlt_source_notion/__init__.py @@ -40,7 +40,7 @@ class Table(StrEnum): BOTS = "bots" -def use_id(entity: User, **kwargs) -> dict: +def use_id(entity: User | Page | Database, **kwargs) -> dict: return pydantic_model_dump(entity, **kwargs) | {"_dlt_id": __get_id(entity)} @@ -160,7 +160,7 @@ def database_resource( case _: # See https://developers.notion.com/reference/page-property-values raise ValueError(f"Unsupported property type: {prop.type}") - yield pydantic_model_dump(page, exclude=["properties", "object"]) | row + yield use_id(page, exclude=["properties", "object"]) | row @dlt.source(name="notion") From 45cc99a98c850c84c99362d13a4064c64f9f305c Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 13:02:17 +0000 Subject: [PATCH 04/15] refactor: move to typed SDK --- dlt_source_notion/__init__.py | 225 +++++++----- dlt_source_notion/client.py | 6 +- dlt_source_notion/model/__init__.py | 0 dlt_source_notion/model/notion_2022_06_28.py | 347 ------------------- dlt_source_notion/type_adapters.py | 7 - pyproject.toml | 3 +- uv.lock | 112 ++++-- 7 files changed, 223 insertions(+), 477 deletions(-) delete mode 100644 dlt_source_notion/model/__init__.py delete mode 100644 dlt_source_notion/model/notion_2022_06_28.py delete mode 100644 dlt_source_notion/type_adapters.py diff --git a/dlt_source_notion/__init__.py b/dlt_source_notion/__init__.py index 33e8f47..8cc0382 100644 --- a/dlt_source_notion/__init__.py +++ b/dlt_source_notion/__init__.py @@ -2,20 +2,27 @@ from enum import StrEnum import json -from typing import Any, Iterable, Sequence +from typing import Any, Callable, Generator, Iterable, List, Sequence, TypeVar import dlt +from pydantic import TypeAdapter from dlt.common import json from dlt.common.json import JsonSerializable -from dlt.sources import DltResource, TDataItem +from dlt.sources import DltResource +from pydantic_api.notion.models import ( + UserObject, + StartCursor, + NotionPaginatedData, + Database, + Page, + PageProperty, +) -from notion_client.helpers import iterate_paginated_api +# from notion_client.helpers import iterate_paginated_api from pydantic import AnyUrl, BaseModel from .client import get_notion_client -from .model.notion_2022_06_28 import Database, Page, User -from .type_adapters import user_adapter, object_adapter def anyurl_encoder(obj: Any) -> JsonSerializable: @@ -40,7 +47,7 @@ class Table(StrEnum): BOTS = "bots" -def use_id(entity: User | Page | Database, **kwargs) -> dict: +def use_id(entity: UserObject, **kwargs) -> dict: return pydantic_model_dump(entity, **kwargs) | {"_dlt_id": __get_id(entity)} @@ -50,48 +57,70 @@ def __get_id(obj): return getattr(obj, "id", None) +R = TypeVar("R", bound=BaseModel) + + +def iterate_paginated_api( + function: Callable[..., NotionPaginatedData[R]], **kwargs: Any +) -> Generator[List[R], None, None]: + """Return an iterator over the results of any paginated Notion API.""" + next_cursor: StartCursor = kwargs.pop("start_cursor", None) + + while True: + response = function(**kwargs, start_cursor=next_cursor) + yield response.results + + next_cursor = response.next_cursor + if not response.has_more or not next_cursor: + return + + @dlt.resource( selected=True, parallelized=True, primary_key="id", ) -def list_users() -> Iterable[TDataItem]: - - notion = get_notion_client() +def list_users() -> Iterable[UserObject]: + client = get_notion_client() - for user in iterate_paginated_api(notion.users.list): - yield user_adapter.validate_python(user) + yield from iterate_paginated_api(client.users.list) @dlt.transformer( parallelized=True, name="users", ) -def split_user(user: User): - - match user.type: - case "bot": - yield dlt.mark.with_hints( - item=use_id(user, exclude=["type", "object"]), - hints=dlt.mark.make_hints( - table_name=Table.BOTS.value, - primary_key="id", - write_disposition="merge", - ), - # needs to be a variant due to https://github.com/dlt-hub/dlt/pull/2109 - create_table_variant=True, - ) - case "person": - yield dlt.mark.with_hints( - item=use_id(user, exclude=["bot", "type", "object"]), - hints=dlt.mark.make_hints( - table_name=Table.PERSONS.value, - primary_key="id", - write_disposition="merge", - ), - # needs to be a variant due to https://github.com/dlt-hub/dlt/pull/2109 - create_table_variant=True, - ) +def split_user(users: List[UserObject]): + """ + Split users into two tables: persons and bots. + """ + for user in users: + match user.type: + case "bot": + yield dlt.mark.with_hints( + item=use_id(user, exclude=["type", "object"]), + hints=dlt.mark.make_hints( + table_name=Table.BOTS.value, + primary_key="id", + write_disposition="merge", + ), + # needs to be a variant due to https://github.com/dlt-hub/dlt/pull/2109 + create_table_variant=True, + ) + case "person": + yield dlt.mark.with_hints( + item=use_id(user, exclude=["bot", "type", "object"]), + hints=dlt.mark.make_hints( + table_name=Table.PERSONS.value, + primary_key="id", + write_disposition="merge", + ), + # needs to be a variant due to https://github.com/dlt-hub/dlt/pull/2109 + create_table_variant=True, + ) + + +page_property_adapter = TypeAdapter(PageProperty) @dlt.resource( @@ -102,65 +131,71 @@ def split_user(user: User): ) def database_resource( database_id: str, -) -> Iterable[TDataItem]: - - notion = get_notion_client() - - db_raw = notion.databases.retrieve(database_id) - db: Database = object_adapter.validate_python(db_raw) - assert isinstance(db, Database) - - selected_properties = [p.name for p in db.properties.values() if p.name is not None] - - for page_raw in iterate_paginated_api( - notion.databases.query, database_id=database_id - ): - page: Page = object_adapter.validate_python(page_raw) - assert isinstance(page, Page) - - row = {} - for selected_property in selected_properties: - prop = page.properties[selected_property] - - match prop.type: - case "title": - row[selected_property] = " ".join( - [t.text.content for t in prop.title] - ) - case "rich_text": - row[selected_property] = " ".join( - [t.text.content for t in prop.rich_text] - ) - case "number": - row[selected_property] = prop.number - case "select": - if prop.select is None: - row[selected_property] = None - continue - row[selected_property] = prop.select.id - case "multi_select": - row[selected_property] = [s.id for s in prop.multi_select] - case "date": - if prop.date is None: - row[selected_property] = None - continue - if prop.date.end: - # we have a range - row[selected_property] = prop.date - else: - row[selected_property] = prop.date.start - case "people": - row[selected_property] = [p.id for p in prop.people] - case "last_edited_by": - row[selected_property] = prop.last_edited_by.id - case "last_edited_time": - row[selected_property] = prop.last_edited_time - case "relation": - row[selected_property] = [r.id for r in prop.relation] - case _: - # See https://developers.notion.com/reference/page-property-values - raise ValueError(f"Unsupported property type: {prop.type}") - yield use_id(page, exclude=["properties", "object"]) | row + property_filter: Callable[[str], bool] = lambda _: True, +) -> Iterable[Page]: + + client = get_notion_client() + + db: Database = client.databases.retrieve(database_id=database_id) + + all_properties = [ + p.name + for p in db.properties.values() + if p.name is not None and isinstance(p.name, str) + ] + selected_properties = list(filter(property_filter, all_properties)) + + for pages in iterate_paginated_api(client.databases.query, database_id=database_id): + for page in pages: + assert isinstance(page, Page) + + row = {} + for selected_property in selected_properties: + prop_raw = page.properties[selected_property] + # TODO: remove this cast, once https://github.com/stevieflyer/pydantic-api-models-notion/pull/6 lands + prop: PageProperty = page_property_adapter.validate_python(prop_raw) + + match prop.type: + case "title": + row[selected_property] = " ".join( + [t.text.content for t in prop.title] + ) + case "rich_text": + row[selected_property] = " ".join( + [t.text.content for t in prop.rich_text] + ) + case "number": + row[selected_property] = prop.number + case "select": + if prop.select is None: + row[selected_property] = None + continue + row[selected_property] = prop.select.id + case "multi_select": + row[selected_property] = [s.id for s in prop.multi_select] + case "date": + if prop.date is None: + row[selected_property] = None + continue + if prop.date.end: + # we have a range + row[selected_property] = prop.date + else: + row[selected_property] = prop.date.start + case "people": + row[selected_property] = [p.id for p in prop.people] + case "last_edited_by": + row[selected_property] = prop.last_edited_by.id + case "last_edited_time": + row[selected_property] = prop.last_edited_time + case "relation": + row[selected_property] = [r.id for r in prop.relation] + case _: + # See https://developers.notion.com/reference/page-property-values + raise ValueError( + f"Unsupported property type: {prop.type}; Please open a pull request." + ) + yield use_id(page, exclude=["properties", "object"]) | row @dlt.source(name="notion") diff --git a/dlt_source_notion/client.py b/dlt_source_notion/client.py index 55da02a..af76be0 100644 --- a/dlt_source_notion/client.py +++ b/dlt_source_notion/client.py @@ -1,8 +1,8 @@ import dlt -from notion_client import Client +from pydantic_api.notion.sdk import NotionClient -def get_notion_client() -> Client: +def get_notion_client() -> NotionClient: if not hasattr(get_notion_client, "client"): - get_notion_client.client = Client(auth=dlt.secrets["notion_token"]) + get_notion_client.client = NotionClient(auth=dlt.secrets["notion_token"]) return get_notion_client.client diff --git a/dlt_source_notion/model/__init__.py b/dlt_source_notion/model/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/dlt_source_notion/model/notion_2022_06_28.py b/dlt_source_notion/model/notion_2022_06_28.py deleted file mode 100644 index 901d6db..0000000 --- a/dlt_source_notion/model/notion_2022_06_28.py +++ /dev/null @@ -1,347 +0,0 @@ -from __future__ import annotations -from typing import List, Optional, Literal, Dict, Any, Annotated -from uuid import UUID -from pydantic import AnyUrl, BaseModel, ConfigDict, Field, constr -from pydantic_extra_types.pendulum_dt import DateTime, Date - - -class Empty(BaseModel): - ... - model_config = ConfigDict(extra="forbid") - - -# Define a custom type for Notion property IDs. -PropertyID = constr(pattern=r"^[A-Za-z0-9%~\\]+$") - - -class UserReference(BaseModel): - object: Literal["user"] - id: UUID - - -class UserBase(UserReference): - name: str - avatar_url: Optional[AnyUrl] = None - - -class PersonProperties(BaseModel): - email: str - - -# Person model -class Person(UserBase): - type: Literal["person"] - person: PersonProperties - - -class WorkspaceBotOwner(BaseModel): - type: Literal["workspace"] = None - workspace: Optional[bool] = None - - -class UserBotOwner(BaseModel): - type: Literal["user"] = None - user: User - - -# The discriminated union using the "type" field as the discriminator -BotOwner = Annotated[ - UserBotOwner | WorkspaceBotOwner, - Field(discriminator="type"), -] - - -class BotProperties(BaseModel): - owner: BotOwner - workspace_name: Optional[str] = None - - -# Bot model -class Bot(UserBase): - type: Literal["bot"] - bot: BotProperties | Empty - - -# The discriminated union for User using the "type" field as the discriminator -User = Annotated[Person | Bot, Field(discriminator="type")] - - -class Link(BaseModel): - url: AnyUrl - - -# Models for the title field -class TextContent(BaseModel): - content: str - link: Optional[Link] = None - - -class TextItemAnnotations(BaseModel): - bold: bool - italic: bool - strikethrough: bool - underline: bool - code: bool - color: str - - -class TextItem(BaseModel): - type: Literal["text"] - text: TextContent - annotations: TextItemAnnotations - plain_text: str - href: Optional[AnyUrl] = None - - -class ReferenceBase(BaseModel): - type: str # This field is used as the discriminator - - -class PageReference(ReferenceBase): - type: Literal["page_id"] - page_id: UUID - - -class DatabaseReference(ReferenceBase): - type: Literal["database_id"] - database_id: UUID - - -# The discriminated union for Property using the "type" field as the discriminator -Reference = Annotated[PageReference | DatabaseReference, Field(discriminator="type")] - - -# Base class for Property (used as discriminator) -class PropertyBase(BaseModel): - id: PropertyID # type: ignore - name: Optional[str] = None # This is only available in the database item itself - type: str # This field is used as the discriminator - - -# People property model -class EmptyPeopleProperty(PropertyBase): - type: Literal["people"] - people: Empty - - -class PeopleProperty(EmptyPeopleProperty): - people: List[User] - - -# Rich text property model -class EmptyRichTextProperty(PropertyBase): - type: Literal["rich_text"] - rich_text: Empty - - -class RichTextProperty(EmptyRichTextProperty): - rich_text: List[TextItem] - - -NamedColor = Literal[ - "default", - "gray", - "brown", - "orange", - "yellow", - "green", - "blue", - "purple", - "pink", - "red", -] - - -# Multi-select property models -class MultiSelectOption(BaseModel): - id: PropertyID | UUID # type: ignore - name: str - color: NamedColor - description: Optional[str] = None - - -class MultiSelectData(BaseModel): - options: List[MultiSelectOption] - - -class MultiSelectPropertyBase(PropertyBase): - type: Literal["multi_select"] - - -class EmptyMultiSelectProperty(MultiSelectPropertyBase): - multi_select: MultiSelectData - - -class MultiSelectProperty(MultiSelectPropertyBase): - multi_select: List[MultiSelectOption] - - -# Last edited time property model -class EmptyLastEditedTimeProperty(PropertyBase): - type: Literal["last_edited_time"] - last_edited_time: Empty - - -class LastEditedTimeProperty(EmptyLastEditedTimeProperty): - last_edited_time: DateTime - - -class DateData(BaseModel): - """ - https://developers.notion.com/reference/page-property-values#date - """ - - start: DateTime | Date - end: Optional[DateTime] = None - time_zone: Optional[Any] = None # TODO: Update type when structure is known - - -# Date property model -class EmptyDateProperty(PropertyBase): - type: Literal["date"] - date: Empty - - -class DateProperty(EmptyDateProperty): - date: Optional[DateData] = None - - -# Number property models -class EmptyNumberData(BaseModel): - format: Literal["number"] - - -class EmptyNumberProperty(PropertyBase): - type: Literal["number"] - number: EmptyNumberData - - -class NumberProperty(EmptyNumberProperty): - number: Optional[int | float] = None - - -# Select property models -class SelectOptionBase(BaseModel): - id: UUID - name: str - color: NamedColor - - -class EmptySelectOption(SelectOptionBase): - description: Optional[str] = None - - -class SelectOption(SelectOptionBase): - pass - - -class SelectData(BaseModel): - options: List[EmptySelectOption] - - -class SelectPropertyBase(PropertyBase): - type: Literal["select"] - - -class EmptySelectProperty(SelectPropertyBase): - select: SelectData - - -class SelectProperty(SelectPropertyBase): - select: Optional[SelectOption] = None - - -# Last edited by property model -class EmptyLastEditedByProperty(PropertyBase): - type: Literal["last_edited_by"] - last_edited_by: Empty - - -class LastEditedByProperty(EmptyLastEditedByProperty): - last_edited_by: User - - -# Title property model - - -class EmptyTitleProperty(PropertyBase): - type: Literal["title"] - title: Empty - - -class TitleProperty(EmptyTitleProperty): - title: List[TextItem] - - -class IdReference(BaseModel): - id: UUID - - -class RelationProperty(PropertyBase): - type: Literal["relation"] - relation: List[IdReference] - has_more: bool - - -# The discriminated union for Property using the "type" field as the discriminator -Property = Annotated[ - PeopleProperty - | RichTextProperty - | MultiSelectProperty - | LastEditedTimeProperty - | DateProperty - | NumberProperty - | SelectProperty - | LastEditedByProperty - | TitleProperty - | RelationProperty, - Field(discriminator="type"), -] - -# The discriminated union for GenericProperty using the "type" field as the discriminator -GenericProperty = Annotated[ - EmptyPeopleProperty - | EmptyRichTextProperty - | EmptyMultiSelectProperty - | EmptyLastEditedTimeProperty - | EmptyDateProperty - | EmptyNumberProperty - | EmptySelectProperty - | EmptyLastEditedByProperty - | EmptyTitleProperty, - Field(discriminator="type"), -] - - -class ObjectBase(BaseModel): - object: str # This field is used as the discriminator - id: UUID - cover: Optional[Any] = None # TODO: Update type when structure is known - icon: Optional[Any] = None # TODO: Update type when structure is known - created_time: DateTime - created_by: UserReference - last_edited_by: UserReference - last_edited_time: DateTime - parent: Reference - archived: bool - in_trash: bool - url: AnyUrl - public_url: Optional[AnyUrl] = None - - -class Page(ObjectBase): - object: Literal["page"] - properties: Dict[str, Property] - - -# Main model for the database object -class Database(ObjectBase): - object: Literal["database"] - title: List[TextItem] - description: List[Any] # TODO: Update type when structure is known - is_inline: bool - properties: Dict[str, GenericProperty] - request_id: UUID - - -NotionObject = Annotated[Page | Database, Field(discriminator="object")] diff --git a/dlt_source_notion/type_adapters.py b/dlt_source_notion/type_adapters.py deleted file mode 100644 index c8e8c24..0000000 --- a/dlt_source_notion/type_adapters.py +++ /dev/null @@ -1,7 +0,0 @@ -from pydantic import TypeAdapter - -from .model.notion_2022_06_28 import NotionObject, User - - -user_adapter = TypeAdapter(User) -object_adapter = TypeAdapter(NotionObject) diff --git a/pyproject.toml b/pyproject.toml index 80cabd4..83bc1aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,7 @@ readme = "README.md" requires-python = ">=3.12" dependencies = [ "dlt>=1.8.1", - "notion-client>=2.3.0", - "pydantic-extra-types>=2.10.3", + "pydantic-api-sdk-notion>=0.0.34", "pydantic>=2.10.6", ] dynamic = ["authors", "classifiers", "version", "description"] diff --git a/uv.lock b/uv.lock index cc9dea5..80e3345 100644 --- a/uv.lock +++ b/uv.lock @@ -252,9 +252,8 @@ version = "0.0.1" source = { virtual = "." } dependencies = [ { name = "dlt" }, - { name = "notion-client" }, { name = "pydantic" }, - { name = "pydantic-extra-types" }, + { name = "pydantic-api-sdk-notion" }, ] [package.optional-dependencies] @@ -273,9 +272,8 @@ dev = [ requires-dist = [ { name = "dlt", specifier = ">=1.8.1" }, { name = "dlt", extras = ["duckdb"], marker = "extra == 'show'", specifier = ">=1.8.1" }, - { name = "notion-client", specifier = ">=2.3.0" }, { name = "pydantic", specifier = ">=2.10.6" }, - { name = "pydantic-extra-types", specifier = ">=2.10.3" }, + { name = "pydantic-api-sdk-notion", specifier = ">=0.0.34" }, { name = "streamlit", marker = "extra == 'show'", specifier = ">=1.41.1" }, { name = "watchdog", marker = "extra == 'show'", specifier = ">=6.0.0" }, ] @@ -283,6 +281,15 @@ requires-dist = [ [package.metadata.requires-dev] dev = [{ name = "pytest", specifier = ">=8.3.4" }] +[[package]] +name = "dnspython" +version = "2.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/4a/263763cb2ba3816dd94b08ad3a33d5fdae34ecb856678773cc40a3605829/dnspython-2.7.0.tar.gz", hash = "sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1", size = 345197 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632 }, +] + [[package]] name = "duckdb" version = "1.2.1" @@ -307,6 +314,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/1c/4e29e52a35b5af451b24232b6f89714180da71c904017e62f7cc5477f135/duckdb-1.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:6112711457b6014ac041492bedf8b6a97403666aefa20a4a4f3479db10136501", size = 11365219 }, ] +[[package]] +name = "email-validator" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dnspython" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/ce/13508a1ec3f8bb981ae4ca79ea40384becc868bfae97fd1c942bb3a001b1/email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7", size = 48967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/ee/bf0adb559ad3c786f12bcbc9296b3f5675f529199bef03e2df281fa1fadb/email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631", size = 33521 }, +] + +[[package]] +name = "emoji" +version = "2.14.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/7d/01cddcbb6f5cc0ba72e00ddf9b1fa206c802d557fd0a20b18e130edf1336/emoji-2.14.1.tar.gz", hash = "sha256:f8c50043d79a2c1410ebfae833ae1868d5941a67a6cd4d18377e2eb0bd79346b", size = 597182 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/db/a0335710caaa6d0aebdaa65ad4df789c15d89b7babd9a30277838a7d9aac/emoji-2.14.1-py3-none-any.whl", hash = "sha256:35a8a486c1460addb1499e3bf7929d3889b2e2841a57401903699fef595e942b", size = 590617 }, +] + [[package]] name = "fsspec" version = "2025.3.0" @@ -543,11 +572,11 @@ wheels = [ [[package]] name = "narwhals" -version = "1.30.0" +version = "1.31.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c4/98/be6d35e8869ab9403fa25dc3458e7af6ce36dac2873c74c7274a59b21958/narwhals-1.30.0.tar.gz", hash = "sha256:0c50cc67a5404da501302882838ec17dce51703d22cd8ad89162d6f60ea0bb19", size = 253461 } +sdist = { url = "https://files.pythonhosted.org/packages/36/fa/c2b6a4d5dbc4af15aa58c86920d5275a9c65142318179b246685069f57da/narwhals-1.31.0.tar.gz", hash = "sha256:333472e2562343dfdd27407ec9b5114a07c81d0416794e4ac6b703dd925c6a1a", size = 253463 } wheels = [ - { url = "https://files.pythonhosted.org/packages/e4/97/bde1e4cf1e0fe0d4c70f750b57d152c0ecb04bb35de7aa7950a5756a71d6/narwhals-1.30.0-py3-none-any.whl", hash = "sha256:443aa0a1abfae89bc65a6b888a7e310a03d1818bfb2ccd61c150199a5f954c17", size = 313611 }, + { url = "https://files.pythonhosted.org/packages/f9/c0/fb39bd876ea2fd9509343d643690cd2f9715e6a77271e7c7b26f1eea70c1/narwhals-1.31.0-py3-none-any.whl", hash = "sha256:2a7b79bb5f511055c4c0142121fc0d4171ea171458e12d44dbd9c8fc6488e997", size = 313124 }, ] [[package]] @@ -830,6 +859,50 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f4/3c/8cc1cc84deffa6e25d2d0c688ebb80635dfdbf1dbea3e30c541c8cf4d860/pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584", size = 431696 }, ] +[package.optional-dependencies] +email = [ + { name = "email-validator" }, +] + +[[package]] +name = "pydantic-api-models" +version = "0.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic", extra = ["email"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/7b/11f67dc5956f0d10cd34625bdd1d9f79aa72400a7a6764001ad6251d66f6/pydantic_api_models-0.0.2.tar.gz", hash = "sha256:917c13220c06bd9239f28579fdb636364b5ec80c12aaa8009e17cbd5581a2332", size = 2034 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/9d/89a9d31a9c7003d2b7273fe961094f35e6649a416ec380266986a728a9fc/pydantic_api_models-0.0.2-py3-none-any.whl", hash = "sha256:cb5bbb7177e51a2022e4ffb503f5818a5549238e3441ad5054199f5d55d7bd56", size = 2674 }, +] + +[[package]] +name = "pydantic-api-models-notion" +version = "0.0.19" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "emoji" }, + { name = "pydantic-api-models" }, + { name = "uuid" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d4/1e/b23a26a06c708e5d31500060e03c775bb29eaa5d1d957b971a728e92345a/pydantic_api_models_notion-0.0.19.tar.gz", hash = "sha256:5d06a1e3eba4fc98bf8e983589653686d0be0e304efa575c3040aae7b13b8f53", size = 25848 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/16/0cf70129a9fba085f677f9b175df6d18a2db4106ed8ee489763b5c207cde/pydantic_api_models_notion-0.0.19-py3-none-any.whl", hash = "sha256:5790a36130d65892c8b169705be59f02a4096423c0bd305944325c155ec908c1", size = 39549 }, +] + +[[package]] +name = "pydantic-api-sdk-notion" +version = "0.0.34" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "notion-client" }, + { name = "pydantic-api-models-notion" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/9f/2ea01fdc086614060d6456c2c194d533ee2f4677ee513a3bcd85753e511d/pydantic_api_sdk_notion-0.0.34.tar.gz", hash = "sha256:7e2147924990cabac54dab1360569dc2177a409dc9183d6e6c1f5fda94ff6278", size = 10727 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/9d/943d37affef425181e93c088e5536336a41a7b893cfd86fb282a3b048034/pydantic_api_sdk_notion-0.0.34-py3-none-any.whl", hash = "sha256:4a188ceb8394b2a604ee816b63a762783aba3c28dea9e69b4a480adceca2c7cc", size = 15416 }, +] + [[package]] name = "pydantic-core" version = "2.27.2" @@ -869,19 +942,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/b2/b2b50d5ecf21acf870190ae5d093602d95f66c9c31f9d5de6062eb329ad1/pydantic_core-2.27.2-cp313-cp313-win_arm64.whl", hash = "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b", size = 1885186 }, ] -[[package]] -name = "pydantic-extra-types" -version = "2.10.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/53/fa/6b268a47839f8af46ffeb5bb6aee7bded44fbad54e6bf826c11f17aef91a/pydantic_extra_types-2.10.3.tar.gz", hash = "sha256:dcc0a7b90ac9ef1b58876c9b8fdede17fbdde15420de9d571a9fccde2ae175bb", size = 95128 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/38/0a/f6f8e5f79d188e2f3fa9ecfccfa72538b685985dd5c7c2886c67af70e685/pydantic_extra_types-2.10.3-py3-none-any.whl", hash = "sha256:e8b372752b49019cd8249cc192c62a820d8019f5382a8789d0f887338a59c0f3", size = 37175 }, -] - [[package]] name = "pydeck" version = "0.9.1" @@ -1104,11 +1164,11 @@ wheels = [ [[package]] name = "setuptools" -version = "76.0.0" +version = "76.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/32/d2/7b171caf085ba0d40d8391f54e1c75a1cda9255f542becf84575cfd8a732/setuptools-76.0.0.tar.gz", hash = "sha256:43b4ee60e10b0d0ee98ad11918e114c70701bc6051662a9a675a0496c1a158f4", size = 1349387 } +sdist = { url = "https://files.pythonhosted.org/packages/fa/2b/287ade3a580869e6178cb37d045f54272b1f006f2c0ff6fad08db258d027/setuptools-76.1.0.tar.gz", hash = "sha256:4959b9ad482ada2ba2320c8f1a8d8481d4d8d668908a7a1b84d987375cd7f5bd", size = 1350273 } wheels = [ - { url = "https://files.pythonhosted.org/packages/37/66/d2d7e6ad554f3a7c7297c3f8ef6e22643ad3d35ef5c63bf488bc89f32f31/setuptools-76.0.0-py3-none-any.whl", hash = "sha256:199466a166ff664970d0ee145839f5582cb9bca7a0a3a2e795b6a9cb2308e9c6", size = 1236106 }, + { url = "https://files.pythonhosted.org/packages/62/fb/47dc84839f2743553075c80d08543b3d0f498f42329141b6717504abcdfd/setuptools-76.1.0-py3-none-any.whl", hash = "sha256:34750dcb17d046929f545dec9b8349fe42bf4ba13ddffee78428aec422dbfb73", size = 1236933 }, ] [[package]] @@ -1319,6 +1379,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, ] +[[package]] +name = "uuid" +version = "1.30" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ce/63/f42f5aa951ebf2c8dac81f77a8edcc1c218640a2a35a03b9ff2d4aa64c3d/uuid-1.30.tar.gz", hash = "sha256:1f87cc004ac5120466f36c5beae48b4c48cc411968eed0eaecd3da82aa96193f", size = 5811 } + [[package]] name = "watchdog" version = "6.0.0" From 4efd5d525816327e3300e2cb363b67a5c09dcea8 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 14:18:25 +0000 Subject: [PATCH 05/15] wip(today's fortune): Save the Whales -- Harpoon a Honda. --- dlt_source_notion/__init__.py | 46 +++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/dlt_source_notion/__init__.py b/dlt_source_notion/__init__.py index 8cc0382..ea3128d 100644 --- a/dlt_source_notion/__init__.py +++ b/dlt_source_notion/__init__.py @@ -17,6 +17,7 @@ Page, PageProperty, ) +from dlt.common.normalizers.naming.snake_case import NamingConvention # from notion_client.helpers import iterate_paginated_api @@ -122,6 +123,8 @@ def split_user(users: List[UserObject]): page_property_adapter = TypeAdapter(PageProperty) +naming_convention = NamingConvention() + @dlt.resource( selected=True, @@ -132,6 +135,7 @@ def split_user(users: List[UserObject]): def database_resource( database_id: str, property_filter: Callable[[str], bool] = lambda _: True, + column_name_projection: Callable[[str], str] = lambda x: x, ) -> Iterable[Page]: client = get_notion_client() @@ -145,6 +149,18 @@ def database_resource( ] selected_properties = list(filter(property_filter, all_properties)) + target_key_mapping = { + p: naming_convention.normalize_path(column_name_projection(p)) + for p in selected_properties + } + target_keys = list(target_key_mapping.values()) + + if len(target_keys) != len(set(target_keys)): + raise ValueError( + "The column name projection function must produce unique column names. Current column names: " + + ", ".join(target_keys) + ) + for pages in iterate_paginated_api(client.databases.query, database_id=database_id): for page in pages: assert isinstance(page, Page) @@ -155,41 +171,41 @@ def database_resource( # TODO: remove this cast, once https://github.com/stevieflyer/pydantic-api-models-notion/pull/6 lands prop: PageProperty = page_property_adapter.validate_python(prop_raw) + target_key = target_key_mapping[selected_property] + match prop.type: case "title": - row[selected_property] = " ".join( - [t.text.content for t in prop.title] - ) + row[target_key] = " ".join([t.text.content for t in prop.title]) case "rich_text": - row[selected_property] = " ".join( + row[target_key] = " ".join( [t.text.content for t in prop.rich_text] ) case "number": - row[selected_property] = prop.number + row[target_key] = prop.number case "select": if prop.select is None: - row[selected_property] = None + row[target_key] = None continue - row[selected_property] = prop.select.id + row[target_key] = prop.select.id case "multi_select": - row[selected_property] = [s.id for s in prop.multi_select] + row[target_key] = [s.id for s in prop.multi_select] case "date": if prop.date is None: - row[selected_property] = None + row[target_key] = None continue if prop.date.end: # we have a range - row[selected_property] = prop.date + row[target_key] = prop.date else: - row[selected_property] = prop.date.start + row[target_key] = prop.date.start case "people": - row[selected_property] = [p.id for p in prop.people] + row[target_key] = [p.id for p in prop.people] case "last_edited_by": - row[selected_property] = prop.last_edited_by.id + row[target_key] = prop.last_edited_by.id case "last_edited_time": - row[selected_property] = prop.last_edited_time + row[target_key] = prop.last_edited_time case "relation": - row[selected_property] = [r.id for r in prop.relation] + row[target_key] = [r.id for r in prop.relation] case _: # See https://developers.notion.com/reference/page-property-values raise ValueError( From a0715d48f0b77d6d7bdf45364b17af9fa2e768a8 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 15:11:46 +0000 Subject: [PATCH 06/15] wip(today's fortune): Love is sentimental measles. --- dlt_source_notion/__init__.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/dlt_source_notion/__init__.py b/dlt_source_notion/__init__.py index ea3128d..acfc5e9 100644 --- a/dlt_source_notion/__init__.py +++ b/dlt_source_notion/__init__.py @@ -16,6 +16,8 @@ Database, Page, PageProperty, + # TODO: replace this with `BaseDatabaseProperty` when https://github.com/stevieflyer/pydantic-api-models-notion/pull/8 lands + DatabaseProperty ) from dlt.common.normalizers.naming.snake_case import NamingConvention @@ -134,23 +136,19 @@ def split_user(users: List[UserObject]): ) def database_resource( database_id: str, - property_filter: Callable[[str], bool] = lambda _: True, - column_name_projection: Callable[[str], str] = lambda x: x, + property_filter: Callable[[DatabaseProperty], bool] = lambda _: True, + column_name_projection: Callable[[DatabaseProperty], str] = lambda x: naming_convention.normalize_path(x.name), ) -> Iterable[Page]: client = get_notion_client() db: Database = client.databases.retrieve(database_id=database_id) - all_properties = [ - p.name - for p in db.properties.values() - if p.name is not None and isinstance(p.name, str) - ] + all_properties = list(db.properties.values()) selected_properties = list(filter(property_filter, all_properties)) target_key_mapping = { - p: naming_convention.normalize_path(column_name_projection(p)) + p.name: column_name_projection(p) for p in selected_properties } target_keys = list(target_key_mapping.values()) @@ -167,11 +165,12 @@ def database_resource( row = {} for selected_property in selected_properties: - prop_raw = page.properties[selected_property] + selected_key = selected_property.name + prop_raw = page.properties[selected_key] # TODO: remove this cast, once https://github.com/stevieflyer/pydantic-api-models-notion/pull/6 lands prop: PageProperty = page_property_adapter.validate_python(prop_raw) - target_key = target_key_mapping[selected_property] + target_key = target_key_mapping[selected_key] match prop.type: case "title": From 5f1e6759d4c33f6c2c8c3432a0fe1269b77aacb7 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 15:34:43 +0000 Subject: [PATCH 07/15] wip(today's fortune): An apple a day makes 365 apples a year. --- README.md | 2 ++ dlt_source_notion/__init__.py | 22 ++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1673d80..11652ee 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ has a few drawbacks: Once you make changes to it, it effectively becomes a fork, making it hard to update after the fact. - This makes use of a preexisting client implementation +- And Pydantic types for Notion entities, + which makes this implementation a lot more stable ## Usage diff --git a/dlt_source_notion/__init__.py b/dlt_source_notion/__init__.py index acfc5e9..7d7b701 100644 --- a/dlt_source_notion/__init__.py +++ b/dlt_source_notion/__init__.py @@ -17,7 +17,7 @@ Page, PageProperty, # TODO: replace this with `BaseDatabaseProperty` when https://github.com/stevieflyer/pydantic-api-models-notion/pull/8 lands - DatabaseProperty + DatabaseProperty, ) from dlt.common.normalizers.naming.snake_case import NamingConvention @@ -48,6 +48,7 @@ def pydantic_model_dump(model: BaseModel, **kwargs): class Table(StrEnum): PERSONS = "persons" BOTS = "bots" + DATABASES = "databases" def use_id(entity: UserObject, **kwargs) -> dict: @@ -137,19 +138,32 @@ def split_user(users: List[UserObject]): def database_resource( database_id: str, property_filter: Callable[[DatabaseProperty], bool] = lambda _: True, - column_name_projection: Callable[[DatabaseProperty], str] = lambda x: naming_convention.normalize_path(x.name), + column_name_projection: Callable[ + [DatabaseProperty], str + ] = lambda x: naming_convention.normalize_path(x.name), ) -> Iterable[Page]: client = get_notion_client() db: Database = client.databases.retrieve(database_id=database_id) + yield dlt.mark.with_hints( + item={"title": db.plain_text_title} + | use_id(db, exclude=["object", "properties", "title"]), + hints=dlt.mark.make_hints( + table_name=Table.DATABASES.value, + primary_key="id", + write_disposition="merge", + ), + # needs to be a variant due to https://github.com/dlt-hub/dlt/pull/2109 + create_table_variant=True, + ) + all_properties = list(db.properties.values()) selected_properties = list(filter(property_filter, all_properties)) target_key_mapping = { - p.name: column_name_projection(p) - for p in selected_properties + p.name: column_name_projection(p) for p in selected_properties } target_keys = list(target_key_mapping.values()) From 978db69af2a8af2b6748c3a5c5884a81bd4f73b1 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 20:09:52 +0000 Subject: [PATCH 08/15] wip(today's fortune): Individualists unite! --- README.md | 6 ++ dlt_source_notion/__init__.py | 125 +++++++++++++++++++++++++++------- notion_pipeline.py | 3 +- 3 files changed, 107 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 11652ee..06817a6 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,12 @@ Currently loads the following data: | -- | -- | | `persons` | Items of the `user` model of type `person` | | `bots` | Items of the `user` model of type `bot` | +| `database__` | The database content (pages) of a given database. Columns are all global page columns + selected columns of properties | +| `databases` | All metadata of each loaded database. Has the title and a reference to the database table as well. | +| `options__` | All options of a `select` or `multi_select` in one of the loaded databases. | + +`` refers to the hashed `id` of an entity. +The hash is a 4-bit BLAKE2b hash. ## Why are you not using the `dlt-hub/verified-sources` notion source / Differences diff --git a/dlt_source_notion/__init__.py b/dlt_source_notion/__init__.py index 7d7b701..a2c6fd4 100644 --- a/dlt_source_notion/__init__.py +++ b/dlt_source_notion/__init__.py @@ -3,8 +3,9 @@ from enum import StrEnum import json from typing import Any, Callable, Generator, Iterable, List, Sequence, TypeVar +from uuid import UUID import dlt -from pydantic import TypeAdapter +from pydantic import Field, TypeAdapter from dlt.common import json from dlt.common.json import JsonSerializable @@ -17,16 +18,26 @@ Page, PageProperty, # TODO: replace this with `BaseDatabaseProperty` when https://github.com/stevieflyer/pydantic-api-models-notion/pull/8 lands - DatabaseProperty, + DatabaseProperty as BaseDatabaseProperty, + SelectOption, + # MultiSelectPropertyConfig, + # SelectPropertyConfig, ) from dlt.common.normalizers.naming.snake_case import NamingConvention - # from notion_client.helpers import iterate_paginated_api from pydantic import AnyUrl, BaseModel from .client import get_notion_client +import hashlib + + +def short_hash(input: str | UUID, digest_size: int = 4) -> str: + # Using BLAKE2b with an x-byte digest (64 bits) + h = hashlib.blake2b(str(input).encode(), digest_size=digest_size) + return h.hexdigest() + def anyurl_encoder(obj: Any) -> JsonSerializable: if isinstance(obj, AnyUrl): @@ -128,6 +139,13 @@ def split_user(users: List[UserObject]): naming_convention = NamingConvention() +DatabaseProperty = BaseDatabaseProperty + +ColumnNameProjection = Callable[[DatabaseProperty, Callable[[str], str]], str | None] +""" +A function that determines the resulting column name for a given property. Return `None` to exclude the property. Fails if the resulting column names are not unique. +""" + @dlt.resource( selected=True, @@ -137,18 +155,19 @@ def split_user(users: List[UserObject]): ) def database_resource( database_id: str, - property_filter: Callable[[DatabaseProperty], bool] = lambda _: True, - column_name_projection: Callable[ - [DatabaseProperty], str - ] = lambda x: naming_convention.normalize_path(x.name), + column_name_projection: ColumnNameProjection, ) -> Iterable[Page]: client = get_notion_client() db: Database = client.databases.retrieve(database_id=database_id) + db_table_name = naming_convention.normalize_path( + "database_" + db.plain_text_title + "_" + short_hash(db.id) + ) + yield dlt.mark.with_hints( - item={"title": db.plain_text_title} + item={"title": db.plain_text_title, "db_table_name": db_table_name} | use_id(db, exclude=["object", "properties", "title"]), hints=dlt.mark.make_hints( table_name=Table.DATABASES.value, @@ -160,17 +179,38 @@ def database_resource( ) all_properties = list(db.properties.values()) - selected_properties = list(filter(property_filter, all_properties)) + + for p in all_properties: + if p.type not in ["multi_select", "select"]: + continue + + # data: MultiSelectPropertyConfig | SelectPropertyConfig = getattr(p, p.type) + data = getattr(p, p.type) + if data is None: + continue + for option in data.options: + yield dlt.mark.with_hints( + item=use_id(option, exclude=["object", "color"]), + hints=dlt.mark.make_hints( + table_name="options_" + p.name + "_" + short_hash(p.id), + primary_key="id", + write_disposition="merge", + ), + ) target_key_mapping = { - p.name: column_name_projection(p) for p in selected_properties + p.name: proj + for p in all_properties + if (proj := column_name_projection(p, naming_convention.normalize_path)) + is not None } - target_keys = list(target_key_mapping.values()) + target_column_names = list(target_key_mapping.values()) + selected_properties = list(target_key_mapping.keys()) - if len(target_keys) != len(set(target_keys)): + if len(target_column_names) != len(set(target_column_names)): raise ValueError( "The column name projection function must produce unique column names. Current column names: " - + ", ".join(target_keys) + + ", ".join(target_column_names) ) for pages in iterate_paginated_api(client.databases.query, database_id=database_id): @@ -178,8 +218,7 @@ def database_resource( assert isinstance(page, Page) row = {} - for selected_property in selected_properties: - selected_key = selected_property.name + for selected_key in selected_properties: prop_raw = page.properties[selected_key] # TODO: remove this cast, once https://github.com/stevieflyer/pydantic-api-models-notion/pull/6 lands prop: PageProperty = page_property_adapter.validate_python(prop_raw) @@ -188,9 +227,9 @@ def database_resource( match prop.type: case "title": - row[target_key] = " ".join([t.text.content for t in prop.title]) + row[target_key] = "".join([t.text.content for t in prop.title]) case "rich_text": - row[target_key] = " ".join( + row[target_key] = "".join( [t.text.content for t in prop.rich_text] ) case "number": @@ -199,9 +238,11 @@ def database_resource( if prop.select is None: row[target_key] = None continue - row[target_key] = prop.select.id + row[target_key + "_" + short_hash(prop.id)] = prop.select.id case "multi_select": - row[target_key] = [s.id for s in prop.multi_select] + row[target_key + "_" + short_hash(prop.id)] = [ + s.id for s in prop.multi_select + ] case "date": if prop.date is None: row[target_key] = None @@ -212,35 +253,67 @@ def database_resource( else: row[target_key] = prop.date.start case "people": - row[target_key] = [p.id for p in prop.people] + row[target_key + "_users"] = [p.id for p in prop.people] case "last_edited_by": row[target_key] = prop.last_edited_by.id case "last_edited_time": row[target_key] = prop.last_edited_time case "relation": - row[target_key] = [r.id for r in prop.relation] + row[target_key + "_relations"] = [r.id for r in prop.relation] case _: # See https://developers.notion.com/reference/page-property-values raise ValueError( f"Unsupported property type: {prop.type}; Please open a pull request." ) - yield use_id(page, exclude=["properties", "object"]) | row + yield dlt.mark.with_hints( + item=use_id(page, exclude=["properties"]) | row, + hints=dlt.mark.make_hints( + table_name=db_table_name, + primary_key="id", + write_disposition="merge", + ), + # needs to be a variant due to https://github.com/dlt-hub/dlt/pull/2109 + create_table_variant=True, + ) + + +class DatabaseResourceBase: + column_name_projection: ColumnNameProjection = lambda x, normalize: normalize( + x.name + ) + + +class DatabaseResource(DatabaseResourceBase): + def __init__( + self, database_id: str, column_name_projection: ColumnNameProjection = None + ): + self.database_id = database_id + if column_name_projection is not None: + self.column_name_projection = column_name_projection + + def get_resource(self): + return database_resource( + database_id=self.database_id, + column_name_projection=self.column_name_projection, + ) + + def __str__(self): + return f"DatabaseResource(database_id={self.database_id})" @dlt.source(name="notion") def source( limit: int = -1, + database_resources: List[DatabaseResource] = Field(default_factory=list), ) -> Sequence[DltResource]: users = list_users() if limit != -1: users.add_limit(limit) - db_rs = database_resource(database_id="...") - return ( users | split_user, - db_rs, + *[d.get_resource() for d in database_resources], ) -__all__ = ["source", "database_resource"] +__all__ = ["source", "DatabaseResource", "ColumnNameProjection", "DatabaseProperty"] diff --git a/notion_pipeline.py b/notion_pipeline.py index d8a4e0f..41096b9 100644 --- a/notion_pipeline.py +++ b/notion_pipeline.py @@ -1,5 +1,6 @@ +from typing import Callable import dlt -from dlt_source_notion import source +from dlt_source_notion import source, DatabaseResource, DatabaseProperty DEV_MODE = True From 90d51f2707225e7b695ac16684c6a4971a32bbc0 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 20:23:37 +0000 Subject: [PATCH 09/15] wip(today's fortune): May I ask a question? --- README.md | 1 + dlt_source_notion/__init__.py | 28 +++++++++++++++++++--------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 06817a6..90792dc 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Currently loads the following data: | `bots` | Items of the `user` model of type `bot` | | `database__` | The database content (pages) of a given database. Columns are all global page columns + selected columns of properties | | `databases` | All metadata of each loaded database. Has the title and a reference to the database table as well. | +| `databases__properties` | Column \<-> Label mapping for each unfiltered database property | | `options__` | All options of a `select` or `multi_select` in one of the loaded databases. | `` refers to the hashed `id` of an entity. diff --git a/dlt_source_notion/__init__.py b/dlt_source_notion/__init__.py index a2c6fd4..3ce85c8 100644 --- a/dlt_source_notion/__init__.py +++ b/dlt_source_notion/__init__.py @@ -166,8 +166,26 @@ def database_resource( "database_" + db.plain_text_title + "_" + short_hash(db.id) ) + all_properties = list(db.properties.values()) + + target_key_mapping = { + p.name: proj + for p in all_properties + if (proj := column_name_projection(p, naming_convention.normalize_path)) + is not None + } + + properties = [ + {"column": column, "label": label} + for label, column in target_key_mapping.items() + ] + yield dlt.mark.with_hints( - item={"title": db.plain_text_title, "db_table_name": db_table_name} + item={ + "title": db.plain_text_title, + "db_table_name": db_table_name, + "properties": properties, + } | use_id(db, exclude=["object", "properties", "title"]), hints=dlt.mark.make_hints( table_name=Table.DATABASES.value, @@ -178,8 +196,6 @@ def database_resource( create_table_variant=True, ) - all_properties = list(db.properties.values()) - for p in all_properties: if p.type not in ["multi_select", "select"]: continue @@ -198,12 +214,6 @@ def database_resource( ), ) - target_key_mapping = { - p.name: proj - for p in all_properties - if (proj := column_name_projection(p, naming_convention.normalize_path)) - is not None - } target_column_names = list(target_key_mapping.values()) selected_properties = list(target_key_mapping.keys()) From 3145f78bd5dbeedd036b4f85b14938a58def1dad Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 21:12:30 +0000 Subject: [PATCH 10/15] wip(today's fortune): Yow! Now we can become alcoholics! --- devenv.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/devenv.lock b/devenv.lock index 3e64290..338fc77 100644 --- a/devenv.lock +++ b/devenv.lock @@ -3,10 +3,10 @@ "devenv": { "locked": { "dir": "src/modules", - "lastModified": 1741068816, + "lastModified": 1742320965, "owner": "cachix", "repo": "devenv", - "rev": "9f6da63c162ad86b6fb84edcbd8c447fdc411c3d", + "rev": "6bde92766ddd3ee1630029a03d36baddd51934e2", "type": "github" }, "original": { @@ -55,10 +55,10 @@ ] }, "locked": { - "lastModified": 1740915799, + "lastModified": 1742300892, "owner": "cachix", "repo": "git-hooks.nix", - "rev": "42b1ba089d2034d910566bf6b40830af6b8ec732", + "rev": "ea26a82dda75bee6783baca6894040c8e6599728", "type": "github" }, "original": { From 1370ed68f097472e0d96e123509b6b2a22bea4c2 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 21:26:15 +0000 Subject: [PATCH 11/15] wip(today's fortune): Real Users hate Real Programmers. --- uv.lock | 7 ------- 1 file changed, 7 deletions(-) diff --git a/uv.lock b/uv.lock index 80e3345..3b1a641 100644 --- a/uv.lock +++ b/uv.lock @@ -883,7 +883,6 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "emoji" }, { name = "pydantic-api-models" }, - { name = "uuid" }, ] sdist = { url = "https://files.pythonhosted.org/packages/d4/1e/b23a26a06c708e5d31500060e03c775bb29eaa5d1d957b971a728e92345a/pydantic_api_models_notion-0.0.19.tar.gz", hash = "sha256:5d06a1e3eba4fc98bf8e983589653686d0be0e304efa575c3040aae7b13b8f53", size = 25848 } wheels = [ @@ -1379,12 +1378,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, ] -[[package]] -name = "uuid" -version = "1.30" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ce/63/f42f5aa951ebf2c8dac81f77a8edcc1c218640a2a35a03b9ff2d4aa64c3d/uuid-1.30.tar.gz", hash = "sha256:1f87cc004ac5120466f36c5beae48b4c48cc411968eed0eaecd3da82aa96193f", size = 5811 } - [[package]] name = "watchdog" version = "6.0.0" From 3b9db6db048b819b8200a1a883fcc7b79a670bde Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 19 Mar 2025 21:38:03 +0000 Subject: [PATCH 12/15] wip(today's fortune): You will triumph over your enemy. --- dlt_source_notion/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dlt_source_notion/__init__.py b/dlt_source_notion/__init__.py index 3ce85c8..ae49933 100644 --- a/dlt_source_notion/__init__.py +++ b/dlt_source_notion/__init__.py @@ -19,7 +19,6 @@ PageProperty, # TODO: replace this with `BaseDatabaseProperty` when https://github.com/stevieflyer/pydantic-api-models-notion/pull/8 lands DatabaseProperty as BaseDatabaseProperty, - SelectOption, # MultiSelectPropertyConfig, # SelectPropertyConfig, ) From 56b12720e6990e973a829854546c21e190bb2a35 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Thu, 20 Mar 2025 09:08:42 +0000 Subject: [PATCH 13/15] wip(today's fortune): You are always busy. --- notion_pipeline.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/notion_pipeline.py b/notion_pipeline.py index 41096b9..a8cdd28 100644 --- a/notion_pipeline.py +++ b/notion_pipeline.py @@ -9,8 +9,25 @@ def load_notion_data() -> None: pipeline = dlt.pipeline( pipeline_name="notion_pipeline", destination="duckdb", dev_mode=DEV_MODE ) + + def column_name_projection( + prop: DatabaseProperty, normalize: Callable[[str], str] + ) -> str: + result_name = normalize(prop.name) + if result_name in [ + "my_column_name", + ]: + return None + return result_name + + my_db = DatabaseResource( + database_id="12345678912345678912345678912345", + column_name_projection=column_name_projection, + ) + data = source( limit=-1 if not DEV_MODE else 1, + databases=[my_db], ) info = pipeline.run( data, From 4da0028e8039fd07138a97c61e7b0cd4705667bb Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Thu, 20 Mar 2025 11:26:04 +0000 Subject: [PATCH 14/15] wip(today's fortune): Contest void where prohibited by law. --- devenv.lock | 16 ++++++++++++++++ devenv.nix | 6 +++++- devenv.yaml | 2 ++ notion_pipeline.py | 2 +- pyproject.toml | 4 ++++ uv.lock | 45 ++++++++++++++++++++++----------------------- 6 files changed, 50 insertions(+), 25 deletions(-) diff --git a/devenv.lock b/devenv.lock index 338fc77..f3b7c2b 100644 --- a/devenv.lock +++ b/devenv.lock @@ -122,12 +122,28 @@ "type": "github" } }, + "nixpkgs-unstable": { + "locked": { + "lastModified": 1742272065, + "owner": "nixos", + "repo": "nixpkgs", + "rev": "3549532663732bfd89993204d40543e9edaec4f2", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, "root": { "inputs": { "devenv": "devenv", "git-hooks": "git-hooks", "nixpkgs": "nixpkgs", "nixpkgs-python": "nixpkgs-python", + "nixpkgs-unstable": "nixpkgs-unstable", "pre-commit-hooks": [ "git-hooks" ] diff --git a/devenv.nix b/devenv.nix index 6ad2153..c9e78f1 100644 --- a/devenv.nix +++ b/devenv.nix @@ -6,15 +6,19 @@ ... }: +let + pkgs-unstable = import inputs.nixpkgs-unstable { system = pkgs.stdenv.system; }; +in { packages = [ pkgs.git pkgs.bash + pkgs.python312Packages.setuptools ]; languages.python.enable = true; languages.python.uv.enable = true; - languages.python.uv.package = pkgs.uv; + languages.python.uv.package = pkgs-unstable.python312Packages.uv; languages.python.uv.sync.enable = true; languages.python.uv.sync.allExtras = true; languages.python.venv.enable = true; diff --git a/devenv.yaml b/devenv.yaml index 184b866..8a88bd9 100644 --- a/devenv.yaml +++ b/devenv.yaml @@ -1,6 +1,8 @@ inputs: nixpkgs: url: github:cachix/devenv-nixpkgs/rolling + nixpkgs-unstable: + url: github:nixos/nixpkgs/nixpkgs-unstable nixpkgs-python: url: github:cachix/nixpkgs-python inputs: diff --git a/notion_pipeline.py b/notion_pipeline.py index a8cdd28..9afaa61 100644 --- a/notion_pipeline.py +++ b/notion_pipeline.py @@ -27,7 +27,7 @@ def column_name_projection( data = source( limit=-1 if not DEV_MODE else 1, - databases=[my_db], + database_resources=[my_db], ) info = pipeline.run( data, diff --git a/pyproject.toml b/pyproject.toml index 83bc1aa..f8536c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ dependencies = [ "dlt>=1.8.1", "pydantic-api-sdk-notion>=0.0.34", "pydantic>=2.10.6", + "pydantic-api-models-notion", ] dynamic = ["authors", "classifiers", "version", "description"] @@ -27,3 +28,6 @@ include = '.*py$' [tool.autoflake] remove-all-unused-imports = true remove-unused-variables = true + +[tool.uv.sources] +pydantic-api-models-notion = { git = "https://github.com/stevieflyer/pydantic-api-models-notion.git", rev = "050999bc3817422a11edb79835dcd1b48314ab04" } \ No newline at end of file diff --git a/uv.lock b/uv.lock index 3b1a641..13401e1 100644 --- a/uv.lock +++ b/uv.lock @@ -1,8 +1,9 @@ version = 1 +revision = 1 requires-python = ">=3.12" resolution-markers = [ - "python_full_version < '3.13'", "python_full_version >= '3.13'", + "python_full_version < '3.13'", ] [[package]] @@ -153,7 +154,7 @@ name = "click" version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } wheels = [ @@ -248,11 +249,11 @@ wheels = [ [[package]] name = "dlt-source-notion" -version = "0.0.1" source = { virtual = "." } dependencies = [ { name = "dlt" }, { name = "pydantic" }, + { name = "pydantic-api-models-notion" }, { name = "pydantic-api-sdk-notion" }, ] @@ -273,10 +274,12 @@ requires-dist = [ { name = "dlt", specifier = ">=1.8.1" }, { name = "dlt", extras = ["duckdb"], marker = "extra == 'show'", specifier = ">=1.8.1" }, { name = "pydantic", specifier = ">=2.10.6" }, + { name = "pydantic-api-models-notion", git = "https://github.com/stevieflyer/pydantic-api-models-notion.git?rev=050999bc3817422a11edb79835dcd1b48314ab04" }, { name = "pydantic-api-sdk-notion", specifier = ">=0.0.34" }, { name = "streamlit", marker = "extra == 'show'", specifier = ">=1.41.1" }, { name = "watchdog", marker = "extra == 'show'", specifier = ">=6.0.0" }, ] +provides-extras = ["show"] [package.metadata.requires-dev] dev = [{ name = "pytest", specifier = ">=8.3.4" }] @@ -444,11 +447,11 @@ wheels = [ [[package]] name = "iniconfig" -version = "2.0.0" +version = "2.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } +sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, + { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 }, ] [[package]] @@ -796,16 +799,16 @@ wheels = [ [[package]] name = "protobuf" -version = "5.29.3" +version = "5.29.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f7/d1/e0a911544ca9993e0f17ce6d3cc0932752356c1b0a834397f28e63479344/protobuf-5.29.3.tar.gz", hash = "sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620", size = 424945 } +sdist = { url = "https://files.pythonhosted.org/packages/17/7d/b9dca7365f0e2c4fa7c193ff795427cfa6290147e5185ab11ece280a18e7/protobuf-5.29.4.tar.gz", hash = "sha256:4f1dfcd7997b31ef8f53ec82781ff434a28bf71d9102ddde14d076adcfc78c99", size = 424902 } wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/7a/1e38f3cafa022f477ca0f57a1f49962f21ad25850c3ca0acd3b9d0091518/protobuf-5.29.3-cp310-abi3-win32.whl", hash = "sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888", size = 422708 }, - { url = "https://files.pythonhosted.org/packages/61/fa/aae8e10512b83de633f2646506a6d835b151edf4b30d18d73afd01447253/protobuf-5.29.3-cp310-abi3-win_amd64.whl", hash = "sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a", size = 434508 }, - { url = "https://files.pythonhosted.org/packages/dd/04/3eaedc2ba17a088961d0e3bd396eac764450f431621b58a04ce898acd126/protobuf-5.29.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e", size = 417825 }, - { url = "https://files.pythonhosted.org/packages/4f/06/7c467744d23c3979ce250397e26d8ad8eeb2bea7b18ca12ad58313c1b8d5/protobuf-5.29.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84", size = 319573 }, - { url = "https://files.pythonhosted.org/packages/a8/45/2ebbde52ad2be18d3675b6bee50e68cd73c9e0654de77d595540b5129df8/protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f", size = 319672 }, - { url = "https://files.pythonhosted.org/packages/fd/b2/ab07b09e0f6d143dfb839693aa05765257bceaa13d03bf1a696b78323e7a/protobuf-5.29.3-py3-none-any.whl", hash = "sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f", size = 172550 }, + { url = "https://files.pythonhosted.org/packages/9a/b2/043a1a1a20edd134563699b0e91862726a0dc9146c090743b6c44d798e75/protobuf-5.29.4-cp310-abi3-win32.whl", hash = "sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7", size = 422709 }, + { url = "https://files.pythonhosted.org/packages/79/fc/2474b59570daa818de6124c0a15741ee3e5d6302e9d6ce0bdfd12e98119f/protobuf-5.29.4-cp310-abi3-win_amd64.whl", hash = "sha256:bcefcdf3976233f8a502d265eb65ea740c989bacc6c30a58290ed0e519eb4b8d", size = 434506 }, + { url = "https://files.pythonhosted.org/packages/46/de/7c126bbb06aa0f8a7b38aaf8bd746c514d70e6a2a3f6dd460b3b7aad7aae/protobuf-5.29.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:307ecba1d852ec237e9ba668e087326a67564ef83e45a0189a772ede9e854dd0", size = 417826 }, + { url = "https://files.pythonhosted.org/packages/a2/b5/bade14ae31ba871a139aa45e7a8183d869efe87c34a4850c87b936963261/protobuf-5.29.4-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:aec4962f9ea93c431d5714ed1be1c93f13e1a8618e70035ba2b0564d9e633f2e", size = 319574 }, + { url = "https://files.pythonhosted.org/packages/46/88/b01ed2291aae68b708f7d334288ad5fb3e7aa769a9c309c91a0d55cb91b0/protobuf-5.29.4-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:d7d3f7d1d5a66ed4942d4fefb12ac4b14a29028b209d4bfb25c68ae172059922", size = 319672 }, + { url = "https://files.pythonhosted.org/packages/12/fb/a586e0c973c95502e054ac5f81f88394f24ccc7982dac19c515acd9e2c93/protobuf-5.29.4-py3-none-any.whl", hash = "sha256:3fde11b505e1597f71b875ef2fc52062b6a9740e5f7c8997ce878b6009145862", size = 172551 }, ] [[package]] @@ -879,15 +882,11 @@ wheels = [ [[package]] name = "pydantic-api-models-notion" version = "0.0.19" -source = { registry = "https://pypi.org/simple" } +source = { git = "https://github.com/stevieflyer/pydantic-api-models-notion.git?rev=050999bc3817422a11edb79835dcd1b48314ab04#050999bc3817422a11edb79835dcd1b48314ab04" } dependencies = [ { name = "emoji" }, { name = "pydantic-api-models" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d4/1e/b23a26a06c708e5d31500060e03c775bb29eaa5d1d957b971a728e92345a/pydantic_api_models_notion-0.0.19.tar.gz", hash = "sha256:5d06a1e3eba4fc98bf8e983589653686d0be0e304efa575c3040aae7b13b8f53", size = 25848 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9b/16/0cf70129a9fba085f677f9b175df6d18a2db4106ed8ee489763b5c207cde/pydantic_api_models_notion-0.0.19-py3-none-any.whl", hash = "sha256:5790a36130d65892c8b169705be59f02a4096423c0bd305944325c155ec908c1", size = 39549 }, -] [[package]] name = "pydantic-api-sdk-notion" @@ -1163,11 +1162,11 @@ wheels = [ [[package]] name = "setuptools" -version = "76.1.0" +version = "77.0.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fa/2b/287ade3a580869e6178cb37d045f54272b1f006f2c0ff6fad08db258d027/setuptools-76.1.0.tar.gz", hash = "sha256:4959b9ad482ada2ba2320c8f1a8d8481d4d8d668908a7a1b84d987375cd7f5bd", size = 1350273 } +sdist = { url = "https://files.pythonhosted.org/packages/ea/df/9f719dc48f64284be8bd99e2e0bb0dd6e9f8e2c2c3c7bf7a685bc5adf2c7/setuptools-77.0.1.tar.gz", hash = "sha256:a1246a1b4178c66d7cf50c9fc6d530fac3f89bc284cf803c7fa878c41b1a03b2", size = 1366225 } wheels = [ - { url = "https://files.pythonhosted.org/packages/62/fb/47dc84839f2743553075c80d08543b3d0f498f42329141b6717504abcdfd/setuptools-76.1.0-py3-none-any.whl", hash = "sha256:34750dcb17d046929f545dec9b8349fe42bf4ba13ddffee78428aec422dbfb73", size = 1236933 }, + { url = "https://files.pythonhosted.org/packages/40/50/bc3d02829a3babd70b7f1414c93cf6acd198976f0469a07d0e7b813c5002/setuptools-77.0.1-py3-none-any.whl", hash = "sha256:81a234dff81a82bb52e522c8aef145d0dd4de1fd6de4d3b196d0f77dc2fded26", size = 1254282 }, ] [[package]] @@ -1254,7 +1253,7 @@ dependencies = [ { name = "toml" }, { name = "tornado" }, { name = "typing-extensions" }, - { name = "watchdog", marker = "platform_system != 'Darwin'" }, + { name = "watchdog", marker = "sys_platform != 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/53/c6/c2a5a9037b583d7e48c8fe4ea07f72f59c1796c57f7c01f907a0a388e40a/streamlit-1.43.2.tar.gz", hash = "sha256:f3afa2af637d00154c6a4c560d2fde256d7dc8cc1f32a53cf20570c0967841bc", size = 9345475 } wheels = [ From ea97a02dbc9e8457b100bdb00987de4d5435fca8 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Thu, 20 Mar 2025 11:40:20 +0000 Subject: [PATCH 15/15] wip(today's fortune): Eschew obfuscation. --- dlt_source_notion/__init__.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/dlt_source_notion/__init__.py b/dlt_source_notion/__init__.py index ae49933..6e8442c 100644 --- a/dlt_source_notion/__init__.py +++ b/dlt_source_notion/__init__.py @@ -17,10 +17,9 @@ Database, Page, PageProperty, - # TODO: replace this with `BaseDatabaseProperty` when https://github.com/stevieflyer/pydantic-api-models-notion/pull/8 lands - DatabaseProperty as BaseDatabaseProperty, - # MultiSelectPropertyConfig, - # SelectPropertyConfig, + BaseDatabaseProperty, + MultiSelectPropertyConfig, + SelectPropertyConfig, ) from dlt.common.normalizers.naming.snake_case import NamingConvention @@ -199,10 +198,7 @@ def database_resource( if p.type not in ["multi_select", "select"]: continue - # data: MultiSelectPropertyConfig | SelectPropertyConfig = getattr(p, p.type) - data = getattr(p, p.type) - if data is None: - continue + data: MultiSelectPropertyConfig | SelectPropertyConfig = getattr(p, p.type) for option in data.options: yield dlt.mark.with_hints( item=use_id(option, exclude=["object", "color"]), @@ -228,10 +224,7 @@ def database_resource( row = {} for selected_key in selected_properties: - prop_raw = page.properties[selected_key] - # TODO: remove this cast, once https://github.com/stevieflyer/pydantic-api-models-notion/pull/6 lands - prop: PageProperty = page_property_adapter.validate_python(prop_raw) - + prop = page.properties[selected_key] target_key = target_key_mapping[selected_key] match prop.type: