Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect breaking changes to column names and data types in state:modified check #7216

Merged
merged 27 commits into from
Mar 28, 2023
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
b66beb6
Add state.modified.contract and tests
gshank Mar 22, 2023
14a8e30
Fix unit test
gshank Mar 23, 2023
c5b73ec
Update manifest/v9.json for artifact tests
gshank Mar 23, 2023
2f0d6b7
Update artifacts tests. Add reason to exception.
gshank Mar 23, 2023
d3a4cf4
first pass at changes before modifying tests
emmyoop Mar 21, 2023
edd9261
test updates
emmyoop Mar 22, 2023
b028400
add default
emmyoop Mar 24, 2023
7abbf67
update manifest
emmyoop Mar 24, 2023
4ee1a12
fix tests
emmyoop Mar 24, 2023
1cbab4c
changelog
emmyoop Mar 24, 2023
95d112c
fix unit tests
emmyoop Mar 24, 2023
8540fd3
rename strict -> enforced
emmyoop Mar 24, 2023
25be193
Move call to build_contract_checksum, concatenate reasons and update
gshank Mar 24, 2023
9311211
Merge branch 'main' into ct-2038-contract_state_modified
gshank Mar 24, 2023
d7ed116
Expand test a bit
gshank Mar 27, 2023
9498809
convert to object
emmyoop Mar 27, 2023
c711451
fix tests
emmyoop Mar 27, 2023
8943468
Update Under the Hood-20230217-105223.yaml
emmyoop Mar 27, 2023
20cd0f3
Update Under the Hood-20230217-105223.yaml
emmyoop Mar 27, 2023
520fc40
Rearrange same_contract
gshank Mar 27, 2023
4bf2766
remove stray breakpoints
emmyoop Mar 27, 2023
449aa48
move Contract definition to model_config
emmyoop Mar 27, 2023
8863089
Merge branch 'er/ct-2314-contract-dict' into ct-2038-contract_state_m…
gshank Mar 28, 2023
382f66f
Make changes to use new Contract object
gshank Mar 28, 2023
4f5a5ae
Merge branch 'main' into ct-2038-contract_state_modified
gshank Mar 28, 2023
4b2213a
Fix reference to self.contract in model_config.py
gshank Mar 28, 2023
46ad6d3
Merge branch 'main' into ct-2038-contract_state_modified
gshank Mar 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions .changes/1.5.0/Under the Hood-20230217-105223.yaml

This file was deleted.

6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230323-133026.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Detect breaking changes to contracts in state:modified check
time: 2023-03-23T13:30:26.593717-04:00
custom:
Author: gshank
Issue: "6869"
6 changes: 6 additions & 0 deletions .changes/unreleased/Under the Hood-20230217-105223.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Under the Hood
body: Treat contract config as a python object
time: 2023-02-17T10:52:23.212474-05:00
custom:
Author: gshank emmyoop
Issue: 6748 7184
12 changes: 10 additions & 2 deletions core/dbt/contracts/graph/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,11 @@ class Severity(str):
register_pattern(Severity, insensitive_patterns("warn", "error"))


@dataclass
class ContractConfig(dbtClassMixin, Replaceable):
enforced: bool = False


@dataclass
class Hook(dbtClassMixin, Replaceable):
sql: str
Expand Down Expand Up @@ -286,7 +291,7 @@ def same_contents(cls, unrendered: Dict[str, Any], other: Dict[str, Any]) -> boo
# 'meta' moved here from node
mergebehavior = {
"append": ["pre-hook", "pre_hook", "post-hook", "post_hook", "tags"],
"update": ["quoting", "column_types", "meta", "docs"],
"update": ["quoting", "column_types", "meta", "docs", "contract"],
"dict_key_append": ["grants"],
}

Expand Down Expand Up @@ -451,7 +456,10 @@ class NodeConfig(NodeAndTestConfig):
default_factory=Docs,
metadata=MergeBehavior.Update.meta(),
)
contract: bool = False
contract: ContractConfig = field(
default_factory=ContractConfig,
metadata=MergeBehavior.Update.meta(),
)

# we validate that node_color has a suitable value to prevent dbt-docs from crashing
def __post_init__(self):
Expand Down
78 changes: 66 additions & 12 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import time
from dataclasses import dataclass, field
from enum import Enum
import hashlib

from mashumaro.types import SerializableType
from typing import (
Expand All @@ -20,25 +21,25 @@
from dbt.clients.system import write_file
from dbt.contracts.files import FileHash
from dbt.contracts.graph.unparsed import (
Quoting,
Docs,
FreshnessThreshold,
ExposureType,
ExternalTable,
FreshnessThreshold,
HasYamlMetadata,
MacroArgument,
UnparsedSourceDefinition,
UnparsedSourceTableDefinition,
UnparsedColumn,
TestDef,
Owner,
ExposureType,
MaturityType,
MetricFilter,
MetricTime,
Owner,
Quoting,
TestDef,
UnparsedSourceDefinition,
UnparsedSourceTableDefinition,
UnparsedColumn,
)
from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
from dbt.events.functions import warn_or_error
from dbt.exceptions import ParsingError, InvalidAccessTypeError
from dbt.exceptions import ParsingError, InvalidAccessTypeError, ModelContractError
from dbt.events.types import (
SeedIncreased,
SeedExceedsLimitSamePath,
Expand All @@ -50,7 +51,6 @@
from dbt.flags import get_flags
from dbt.node_types import ModelLanguage, NodeType, AccessType


from .model_config import (
NodeConfig,
SeedConfig,
Expand Down Expand Up @@ -184,6 +184,12 @@ class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin, Replaceable
_extra: Dict[str, Any] = field(default_factory=dict)


@dataclass
class Contract(dbtClassMixin, Replaceable):
enforced: bool = False
checksum: Optional[str] = None


# Metrics, exposures,
@dataclass
class HasRelationMetadata(dbtClassMixin, Replaceable):
Expand Down Expand Up @@ -382,6 +388,13 @@ def same_config(self, old) -> bool:
old.unrendered_config,
)

def build_contract_checksum(self):
pass

def same_contract(self, old) -> bool:
# This would only apply to seeds
return True

def patch(self, patch: "ParsedNodePatch"):
"""Given a ParsedNodePatch, add the new information to the node."""
# explicitly pick out the parts to update so we don't inadvertently
Expand Down Expand Up @@ -423,6 +436,7 @@ def same_contents(self, old) -> bool:
and self.same_persisted_description(old)
and self.same_fqn(old)
and self.same_database_representation(old)
and self.same_contract(old)
and True
)

Expand Down Expand Up @@ -451,7 +465,7 @@ class CompiledNode(ParsedNode):
extra_ctes_injected: bool = False
extra_ctes: List[InjectedCTE] = field(default_factory=list)
_pre_injected_sql: Optional[str] = None
contract: bool = False
contract: Contract = field(default_factory=Contract)

@property
def empty(self):
Expand Down Expand Up @@ -492,6 +506,46 @@ def depends_on_nodes(self):
def depends_on_macros(self):
return self.depends_on.macros

def build_contract_checksum(self):
# We don't need to construct the checksum if the model does not
# have contract enforced, because it won't be used.
# This needs to be executed after contract config is set
if self.contract.enforced is True:
contract_state = ""
# We need to sort the columns so that order doesn't matter
# columns is a str: ColumnInfo dictionary
sorted_columns = sorted(self.columns.values(), key=lambda col: col.name)
for column in sorted_columns:
contract_state += f"|{column.name}"
contract_state += str(column.data_type)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense to leave out constraints for the scope of this issue 👍

We should revisit whether constraints should go in the checksum when we go to implement #7065, where:

  • breaking changes to constraints should raise a ModelContractError from same_contract
  • non-breaking changes to constraints (e.g. adding a constraint, removing an unenforced constraint) should return False from same_contract.

Determining the nature of a change to a constraint will need additional logic beyond comparing checksums, and adding constraints to the checksum will make understanding changes to column data_type and names a bit trickier. I could see the checksum being a useful to determine, crudely, whether there was a change at all, and needing to actually inspect the columns/constraints more closely to determine the type of change - breaking vs non-breaking.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could either checks columns individually or have two checksums in the checksum field, one for name/data_type and one for constraints.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the benefit of putting these methods on CompiledNode versus carrying the columns on Contract and overriding __eq__() on Contract (in place of same_contract())?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are not prepared to move those fields at this time, since it would be a breaking change for packages which use those fields. In addition, not all of the attributes of a column count for breaking changes, so a simple equal doesn't work.

data = contract_state.encode("utf-8")
self.contract.checksum = hashlib.new("sha256", data).hexdigest()

def same_contract(self, old) -> bool:
if old.contract.enforced is False and self.contract.enforced is False:
# Not a change
return True
if old.contract.enforced is False and self.contract.enforced is True:
# A change, but not a breaking change
return False

breaking_change_reasons = []
if old.contract.enforced is True and self.contract.enforced is False:
# Breaking change: throw an error
# Note: we don't have contract.checksum for current node, so build
self.build_contract_checksum()
MichelleArk marked this conversation as resolved.
Show resolved Hide resolved
breaking_change_reasons.append("contract has been disabled")

if self.contract.checksum != old.contract.checksum:
# Breaking change, throw error
breaking_change_reasons.append("column definitions have changed")

if breaking_change_reasons:
raise (ModelContractError(reasons=" and ".join(breaking_change_reasons), node=self))
else:
# no breaking changes
return True


# ====================================
# CompiledNode subclasses
Expand Down Expand Up @@ -914,7 +968,7 @@ def same_contents(self, old: Optional["SourceDefinition"]) -> bool:
if old is None:
return True

# config changes are changes (because the only config is "enabled", and
# config changes are changes (because the only config is "enforced", and
# enabling a source is a change!)
# changing the database/schema/identifier is a change
# messing around with external stuff is a change (uh, right?)
Expand Down
19 changes: 19 additions & 0 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,25 @@ def _fix_dupe_msg(self, path_1: str, path_2: str, name: str, type_name: str) ->
)


class ModelContractError(DbtRuntimeError):
CODE = 10016
MESSAGE = "Contract Error"

def __init__(self, reasons, node=None):
self.reasons = reasons
super().__init__(self.message(), node)

@property
def type(self):
return "Contract"

def message(self):
return (
f"There is a breaking change in the model contract because {self.reasons}; "
"you may need to create a new version. See: https://docs.getdbt.com/docs/collaborate/publish/model-versions"
)


class RecursionError(DbtRuntimeError):
pass

Expand Down
1 change: 1 addition & 0 deletions core/dbt/graph/selector_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[Uniqu
),
"modified.relation": self.check_modified_factory("same_database_representation"),
"modified.macros": self.check_modified_macros,
"modified.contract": self.check_modified_factory("same_contract"),
}
if selector in state_checks:
checker = state_checks[selector]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@

create {% if temporary: -%}temporary{%- endif %} table
{{ relation.include(database=(not temporary), schema=(not temporary)) }}
{% if config.get('contract', False) %}
{% set contract_config = config.get('contract') %}
{% if contract_config.enforced %}
{{ get_assert_columns_equivalent(sql) }}
{{ get_columns_spec_ddl() }}
{%- set sql = get_select_subquery(sql) %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

{{ sql_header if sql_header is not none }}
create view {{ relation }}
{%- if config.get('contract', False) %}
{% set contract_config = config.get('contract') %}
{% if contract_config.enforced %}
{{ get_assert_columns_equivalent(sql) }}
{%- endif %}
as (
Expand Down
13 changes: 6 additions & 7 deletions core/dbt/parser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from dbt.config import Project, RuntimeConfig
from dbt.context.context_config import ContextConfig
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.nodes import ManifestNode, BaseNode
from dbt.contracts.graph.unparsed import UnparsedNode, Docs
from dbt.contracts.graph.nodes import Contract, BaseNode, ManifestNode
from dbt.contracts.graph.unparsed import Docs, UnparsedNode
from dbt.exceptions import DbtInternalError, ConfigUpdateError, DictParseError
from dbt import hooks
from dbt.node_types import NodeType, ModelLanguage
Expand Down Expand Up @@ -297,7 +297,7 @@ def update_parsed_node_config(
# If we have docs in the config, merge with the node level, for backwards
# compatibility with earlier node-only config.
if "docs" in config_dict and config_dict["docs"]:
# we set show at the value of the config if it is set, otherwize, inherit the value
# we set show at the value of the config if it is set, otherwise, inherit the value
docs_show = (
config_dict["docs"]["show"]
if "show" in config_dict["docs"]
Expand All @@ -310,10 +310,9 @@ def update_parsed_node_config(
else:
parsed_node.docs = Docs(show=docs_show)

# If we have "contract" in the config, copy to node level, for backwards
# compatibility with earlier node-only config.
if config_dict.get("contract", False):
parsed_node.contract = True
# If we have contract in the config, copy to node level
if "contract" in config_dict and config_dict["contract"]:
parsed_node.contract = Contract(enforced=config_dict["contract"]["enforced"])

# unrendered_config is used to compare the original database/schema/alias
# values and to handle 'same_config' and 'same_contents' calls
Expand Down
8 changes: 7 additions & 1 deletion core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,7 @@ def get_unparsed_target(self) -> Iterable[NonSourceTarget]:
self.normalize_meta_attribute(data, path)
self.normalize_docs_attribute(data, path)
self.normalize_group_attribute(data, path)
self.normalize_contract_attribute(data, path)
node = self._target_type().from_dict(data)
except (ValidationError, JSONValidationError) as exc:
raise YamlParseDictError(path, self.key, data, exc)
Expand Down Expand Up @@ -828,6 +829,9 @@ def normalize_docs_attribute(self, data, path):
def normalize_group_attribute(self, data, path):
return self.normalize_attribute(data, path, "group")

def normalize_contract_attribute(self, data, path):
return self.normalize_attribute(data, path, "contract")

def patch_node_config(self, node, patch):
# Get the ContextConfig that's used in calculating the config
# This must match the model resource_type that's being patched
Expand Down Expand Up @@ -937,10 +941,12 @@ def parse_patch(self, block: TargetBlock[NodeTarget], refs: ParserRef) -> None:

node.patch(patch)
self.validate_constraints(node)
node.build_contract_checksum()

def validate_constraints(self, patched_node):
error_messages = []
if patched_node.resource_type == "model" and patched_node.config.contract is True:
contract_config = patched_node.config.get("contract")
if patched_node.resource_type == "model" and contract_config.enforced is True:
validators = [
self.constraints_schema_validator(patched_node),
self.constraints_materialization_validator(patched_node),
Expand Down
3 changes: 2 additions & 1 deletion plugins/postgres/dbt/include/postgres/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
{%- elif unlogged -%}
unlogged
{%- endif %} table {{ relation }}
{% if config.get('contract', False) %}
{% set contract_config = config.get('contract') %}
{% if contract_config.enforced %}
{{ get_assert_columns_equivalent(sql) }}
{{ get_columns_spec_ddl() }} ;
insert into {{ relation }} {{ get_column_names() }}
Expand Down
Loading