-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
df0a2d0
commit c6c3cc4
Showing
9 changed files
with
437 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
## Notes | ||
|
||
In order to enable auto-complete for metrics in VS Code or PyCharm, you need to have a mypy plugin installed. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .column_values.between import ColumnValuesBetween |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from typing import Optional | ||
|
||
from great_expectations.core.types import Comparable | ||
from great_expectations.metrics.domain import ColumnValues | ||
from great_expectations.metrics.metric import Metric | ||
|
||
|
||
class ColumnValuesBetween(Metric, ColumnValues): | ||
min_value: Optional[Comparable] = None | ||
max_value: Optional[Comparable] = None | ||
strict_min: bool = False | ||
strict_max: bool = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
from typing import Annotated, Optional | ||
|
||
from great_expectations.compatibility.pydantic import BaseModel, Field, StrictStr | ||
|
||
NonEmptyString = Annotated[StrictStr, Field(min_length=1)] | ||
|
||
|
||
class AbstractClassInstantiationError(TypeError): | ||
def __init__(self, class_name: str) -> None: | ||
super().__init__(f"Cannot instantiate abstract class `{class_name}`.") | ||
|
||
|
||
class Domain(BaseModel): | ||
"""The abstract base class for defining all types of domains over which metrics are computed.""" | ||
|
||
batch_id: NonEmptyString | ||
|
||
def __new__(cls, *args, **kwargs): | ||
if cls is Domain: | ||
raise AbstractClassInstantiationError(cls.__name__) | ||
return super().__new__(cls) | ||
|
||
|
||
class Values(Domain): | ||
"""The abstract base class for metric domain types that compute row-level calculations.""" | ||
|
||
table: NonEmptyString | ||
row_condition: Optional[StrictStr] = None | ||
|
||
def __new__(cls, *args, **kwargs): | ||
if cls is Values: | ||
raise AbstractClassInstantiationError(cls.__name__) | ||
return super().__new__(cls) | ||
|
||
|
||
class ColumnValues(Values): | ||
"""A domain type for metrics that compute row-level calculations on a single column. | ||
The ColumnValues domain type is used to define metrics that evaluate conditions or compute | ||
values for each row in a single column. This class is intended to be used as a mixin | ||
with the Metric class when defining a new Metric. | ||
Attributes: | ||
batch_id (str): Unique identifier for the batch being processed. | ||
table (str): Name of the table containing the column. | ||
column (str): Name of the column to compute metrics on. | ||
row_condition (Optional[str]): A condition that can be used to filter rows. | ||
See: https://docs.greatexpectations.io/docs/core/customize_expectations/expectation_conditions/#create-an-expectation-condition | ||
Examples: | ||
A metric with a ColumnValues domain for column nullity values computed on each row: | ||
>>> class Null(Metric, ColumnValues): | ||
... ... | ||
See Also: | ||
Metric: The abstract base class for defining all metrics | ||
""" | ||
|
||
column: NonEmptyString |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
import re | ||
from typing import ClassVar, Final | ||
|
||
from typing_extensions import dataclass_transform | ||
|
||
from great_expectations.compatibility.pydantic import BaseModel, ModelMetaclass, root_validator | ||
from great_expectations.metrics.domain import AbstractClassInstantiationError, Domain | ||
from great_expectations.validator.metric_configuration import ( | ||
MetricConfiguration, | ||
MetricConfigurationID, | ||
) | ||
|
||
ALLOWABLE_METRIC_MIXINS: Final[int] = 1 | ||
|
||
|
||
class MixinTypeError(TypeError): | ||
def __init__(self, class_name: str, mixin_superclass_name: str) -> None: | ||
super().__init__( | ||
f"`{class_name}` must use a single `{mixin_superclass_name}` subclass mixin." | ||
) | ||
|
||
|
||
@dataclass_transform() | ||
class MetaMetric(ModelMetaclass): | ||
def __new__(cls, name, bases, attrs): | ||
# ensure a single Domain mixin is defined | ||
if name != "Metric" and ( | ||
len(bases) != ALLOWABLE_METRIC_MIXINS + 1 | ||
or not any(issubclass(base_type, Domain) for base_type in bases) | ||
): | ||
raise MixinTypeError(name, "Domain") | ||
return super().__new__(cls, name, bases, attrs) | ||
|
||
|
||
class Metric(BaseModel, metaclass=MetaMetric): | ||
"""The abstract base class for defining all metrics. | ||
A Metric represents a measurable property that can be computed over a specific domain | ||
of data (e.g., a column, table, or column pair). All concrete metric implementations | ||
must inherit from this class and specify their domain type as a mixin. | ||
Examples: | ||
A metric for column nullity values computed on each row: | ||
>>> class Null(Metric, ColumnValues): | ||
... ... | ||
A metric for a single table row count value: | ||
>>> class RowCount(Metric, Table): | ||
... ... | ||
Notes: | ||
- The Metric class cannot be instantiated directly - it must be subclassed. | ||
- Subclasses must specify a single Domain type as a mixin. | ||
- Once Metrics are instantiated, they are immutable. | ||
See Also: | ||
Domain: The base class for all domain types | ||
MetricConfiguration: Configuration class for metric computation | ||
""" | ||
|
||
name: ClassVar[str] | ||
config: MetricConfiguration | ||
|
||
class Config: | ||
arbitrary_types_allowed = True | ||
frozen = True | ||
|
||
def __new__(cls, *args, **kwargs): | ||
if cls is Metric: | ||
raise AbstractClassInstantiationError(cls.__name__) | ||
cls.name = cls._get_metric_name() | ||
return super().__new__(cls) | ||
|
||
@root_validator(pre=True) | ||
@classmethod | ||
def _set_config(cls, values) -> dict: | ||
if "config" not in values or values["config"] is None: | ||
values["config"] = cls._to_config(values) | ||
return values | ||
|
||
@property | ||
def id(self) -> MetricConfigurationID: | ||
return self.config.id | ||
|
||
@staticmethod | ||
def _pascal_to_snake(class_name: str) -> str: | ||
# Adds an underscore between a sequence of uppercase letters and an uppercase-lowercase pair | ||
# APIFunctionMetric -> API_FunctionMetric | ||
class_name = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", class_name) | ||
# Adds an underscore between a lowercase letter/digit and an uppercase letter | ||
# APIFunctionMetric -> API_Function_Metric | ||
class_name = re.sub(r"([a-z\d])([A-Z])", r"\1_\2", class_name) | ||
# Convert the entire string to lowercase | ||
# API_Function_Metric -> api_function_metric | ||
return class_name.lower() | ||
|
||
@classmethod | ||
def _get_metric_name(cls) -> str: | ||
"""The name of the metric as it exists in the registry.""" | ||
for base_type in cls.__bases__: | ||
if issubclass(base_type, Domain): | ||
domain_class_name = str(base_type.__name__) | ||
metric_class_name = str(cls.__name__) | ||
domain_class_snake_case = Metric._pascal_to_snake(domain_class_name) | ||
metric_class_snake_case = Metric._pascal_to_snake(metric_class_name) | ||
# the convention is that the metric class name includes the domain class name | ||
# but the metric names don't repeat the domain name, so we remove it | ||
return ".".join( | ||
[ | ||
domain_class_snake_case, | ||
metric_class_snake_case.replace(domain_class_snake_case, "").strip("_"), | ||
] | ||
) | ||
|
||
# this should never be reached | ||
# that a Domain exists in __bases__ should have been confirmed in MetaMetric.__new__ | ||
raise MixinTypeError(cls.__name__, "Domain") | ||
|
||
@classmethod | ||
def _to_config(cls, model_values: dict) -> MetricConfiguration: | ||
"""Returns a MetricConfiguration instance for this Metric.""" | ||
metric_domain_kwargs = {} | ||
metric_value_kwargs = {} | ||
for base_type in cls.__bases__: | ||
if issubclass(base_type, Domain): | ||
domain_fields = base_type.__fields__ | ||
metric_fields = Metric.__fields__ | ||
value_fields = { | ||
field_name: field_info | ||
for field_name, field_info in cls.__fields__.items() | ||
if field_name not in domain_fields and field_name not in metric_fields | ||
} | ||
for field_name, field_info in domain_fields.items(): | ||
metric_domain_kwargs[field_name] = model_values.get( | ||
field_name, field_info.default | ||
) | ||
for field_name, field_info in value_fields.items(): | ||
metric_value_kwargs[field_name] = model_values.get( | ||
field_name, field_info.default | ||
) | ||
|
||
return MetricConfiguration( | ||
metric_name=cls.name, | ||
metric_domain_kwargs=metric_domain_kwargs, | ||
metric_value_kwargs=metric_value_kwargs, | ||
) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import pytest | ||
|
||
from great_expectations.compatibility.pydantic import ValidationError, errors | ||
from great_expectations.metrics.domain import ( | ||
AbstractClassInstantiationError, | ||
ColumnValues, | ||
Domain, | ||
Values, | ||
) | ||
|
||
BATCH_ID = "my_data_source-my_data_asset-year_2025" | ||
TABLE = "my_table" | ||
COLUMN = "my_column" | ||
|
||
|
||
class TestAbstractClasses: | ||
@pytest.mark.unit | ||
def test_domain_instantiation_raises(self): | ||
with pytest.raises(AbstractClassInstantiationError): | ||
Domain(batch_id=BATCH_ID) | ||
|
||
@pytest.mark.unit | ||
def test_values_instantiation_raises(self): | ||
with pytest.raises(AbstractClassInstantiationError): | ||
Values(batch_id=BATCH_ID, table=TABLE) | ||
|
||
|
||
class TestColumnMap: | ||
@pytest.mark.unit | ||
def test_column_map_instantiation_success(self): | ||
ColumnValues(batch_id=BATCH_ID, table=TABLE, column=COLUMN) | ||
|
||
@pytest.mark.unit | ||
@pytest.mark.parametrize( | ||
"kwargs", | ||
[ | ||
{"batch_id": "", "table": TABLE, "column": COLUMN}, | ||
{"batch_id": BATCH_ID, "table": "", "column": COLUMN}, | ||
{"batch_id": BATCH_ID, "table": TABLE, "column": ""}, | ||
], | ||
) | ||
def test_column_map_arguments_empty_string_raises(self, kwargs: dict): | ||
with pytest.raises(ValidationError) as e: | ||
ColumnValues(**kwargs) | ||
all_errors = e.value.raw_errors | ||
assert any( | ||
True | ||
if hasattr(error, "exc") and isinstance(error.exc, errors.AnyStrMinLengthError) | ||
else False | ||
for error in all_errors | ||
) |
Oops, something went wrong.