Skip to content

Commit

Permalink
Add support for ClassVar variables (#227)
Browse files Browse the repository at this point in the history
  • Loading branch information
alwaysmpe authored Jun 26, 2024
1 parent 1934cb4 commit 59ea0e1
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 1 deletion.
32 changes: 32 additions & 0 deletions docs/source/advanced.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,38 @@
"df.dtypes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ClassVar variables\n",
"\n",
"Variables annotated with `typing.ClassVar` variables are not included in the schema, so these can be used for example to store metadata about the DataSet."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import ClassVar\n",
"\n",
"class Schema:\n",
" id: int\n",
" name: str\n",
" file_name: ClassVar[str] = \"schema_data.csv\"\n",
"\n",
"df1 = DataSet[Schema](\n",
" {\n",
" \"id\": [1, 2, 3],\n",
" \"name\": [\"John\", \"Jane\", \"Jack\"],\n",
" }\n",
")\n",
"\n",
"print(Schema.file_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
10 changes: 9 additions & 1 deletion strictly_typed_pandas/validate_schema.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, Set
from typing import Any, ClassVar, Dict, Set, get_origin

import numpy as np # type: ignore
from pandas.api.extensions import ExtensionDtype
Expand All @@ -14,7 +14,15 @@ def check_for_duplicate_columns(names_index: Set[str], names_data: Set[str]) ->
raise TypeError(msg.format(intersection))


def remove_classvars(schema_expected: Dict[str, Any]) -> Dict[str, Any]:
return {
key: value
for key, value in schema_expected.items() if get_origin(value) is not ClassVar
}


def validate_schema(schema_expected: Dict[str, Any], schema_observed: Dict[str, Any]):
schema_expected = remove_classvars(schema_expected)
_check_names(set(schema_expected.keys()), set(schema_observed.keys()))
_check_dtypes(schema_expected, schema_observed)

Expand Down
15 changes: 15 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pickle
import tempfile
from typing import ClassVar

import numpy as np # type: ignore
import pandas as pd
Expand All @@ -18,6 +19,11 @@ class AlternativeSchema:
a: int


class SchemaWithClassVar:
a: int
b: ClassVar[str] = "abc"


dictionary = {"a": [1, 2, 3], "b": ["a", "b", "c"]}


Expand Down Expand Up @@ -111,6 +117,15 @@ def test_pickle():
assert (df == loaded).all().all()


def test_classvar_colum_not_allowed():
with pytest.raises(TypeError):
DataSet[SchemaWithClassVar](dictionary)


def test_classvar_colum_not_required():
DataSet[SchemaWithClassVar]({"a": [1, 2, 3]})


class A:
a: int

Expand Down

0 comments on commit 59ea0e1

Please sign in to comment.