Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect breaking changes to column names and data types in state:modified check #7216

Merged
merged 27 commits into from
Mar 28, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
b66beb6
Add state.modified.contract and tests
gshank Mar 22, 2023
14a8e30
Fix unit test
gshank Mar 23, 2023
c5b73ec
Update manifest/v9.json for artifact tests
gshank Mar 23, 2023
2f0d6b7
Update artifacts tests. Add reason to exception.
gshank Mar 23, 2023
d3a4cf4
first pass at changes before modifying tests
emmyoop Mar 21, 2023
edd9261
test updates
emmyoop Mar 22, 2023
b028400
add default
emmyoop Mar 24, 2023
7abbf67
update manifest
emmyoop Mar 24, 2023
4ee1a12
fix tests
emmyoop Mar 24, 2023
1cbab4c
changelog
emmyoop Mar 24, 2023
95d112c
fix unit tests
emmyoop Mar 24, 2023
8540fd3
rename strict -> enforced
emmyoop Mar 24, 2023
25be193
Move call to build_contract_checksum, concatenate reasons and update
gshank Mar 24, 2023
9311211
Merge branch 'main' into ct-2038-contract_state_modified
gshank Mar 24, 2023
d7ed116
Expand test a bit
gshank Mar 27, 2023
9498809
convert to object
emmyoop Mar 27, 2023
c711451
fix tests
emmyoop Mar 27, 2023
8943468
Update Under the Hood-20230217-105223.yaml
emmyoop Mar 27, 2023
20cd0f3
Update Under the Hood-20230217-105223.yaml
emmyoop Mar 27, 2023
520fc40
Rearrange same_contract
gshank Mar 27, 2023
4bf2766
remove stray breakpoints
emmyoop Mar 27, 2023
449aa48
move Contract definition to model_config
emmyoop Mar 27, 2023
8863089
Merge branch 'er/ct-2314-contract-dict' into ct-2038-contract_state_m…
gshank Mar 28, 2023
382f66f
Make changes to use new Contract object
gshank Mar 28, 2023
4f5a5ae
Merge branch 'main' into ct-2038-contract_state_modified
gshank Mar 28, 2023
4b2213a
Fix reference to self.contract in model_config.py
gshank Mar 28, 2023
46ad6d3
Merge branch 'main' into ct-2038-contract_state_modified
gshank Mar 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230323-133026.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Detect breaking changes to contracts in state:modified check
time: 2023-03-23T13:30:26.593717-04:00
custom:
Author: gshank
Issue: "6869"
44 changes: 43 additions & 1 deletion core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import time
from dataclasses import dataclass, field
from enum import Enum
import hashlib

from mashumaro.types import SerializableType
from typing import (
Expand Down Expand Up @@ -39,7 +40,7 @@
from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
from dbt.events.proto_types import NodeInfo
from dbt.events.functions import warn_or_error
from dbt.exceptions import ParsingError, InvalidAccessTypeError
from dbt.exceptions import ParsingError, InvalidAccessTypeError, ModelContractError
from dbt.events.types import (
SeedIncreased,
SeedExceedsLimitSamePath,
Expand Down Expand Up @@ -387,6 +388,12 @@ def same_config(self, old) -> bool:
old.unrendered_config,
)

def build_contract_checksum(self):
pass

def same_contract(self, old) -> bool:
return True

def patch(self, patch: "ParsedNodePatch"):
"""Given a ParsedNodePatch, add the new information to the node."""
# explicitly pick out the parts to update so we don't inadvertently
Expand Down Expand Up @@ -428,6 +435,7 @@ def same_contents(self, old) -> bool:
and self.same_persisted_description(old)
and self.same_fqn(old)
and self.same_database_representation(old)
and self.same_contract(old)
and True
)

Expand Down Expand Up @@ -457,6 +465,7 @@ class CompiledNode(ParsedNode):
extra_ctes: List[InjectedCTE] = field(default_factory=list)
_pre_injected_sql: Optional[str] = None
contract: bool = False
contract_checksum: Optional[str] = None

@property
def empty(self):
Expand Down Expand Up @@ -497,6 +506,39 @@ def depends_on_nodes(self):
def depends_on_macros(self):
return self.depends_on.macros

def build_contract_checksum(self):
# We don't need to construct the checksum if the model does not
# have contract enabled, because it won't be used.
# This needs to be executed after contract config is set
if self.contract is True:
contract_state = ""
# We need to sort the columns so that order doesn't matter
# columns is a str: ColumnInfo dictionary
sorted_columns = sorted(self.columns.values(), key=lambda col: col.name)
for column in sorted_columns:
contract_state += f"|{column.name}"
contract_state += column.data_type
contract_state += str(column.constraints)
data = contract_state.encode("utf-8")
self.contract_checksum = hashlib.new("sha256", data).hexdigest()

def same_contract(self, old) -> bool:
if old.contract is False and self.contract is False:
# Not a change
return True
if old.contract is False and self.contract is True:
# A change, but not a breaking change
return False
if old.contract is True and self.contract is False:
# Breaking change: throw an error
raise (ModelContractError(node=self))
if self.contract_checksum == old.contract_checksum:
MichelleArk marked this conversation as resolved.
Show resolved Hide resolved
# Breaking change: throw an error
raise (ModelContractError(node=self))
else:
# No change
return False
MichelleArk marked this conversation as resolved.
Show resolved Hide resolved


# ====================================
# CompiledNode subclasses
Expand Down
18 changes: 18 additions & 0 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,24 @@ def _fix_dupe_msg(self, path_1: str, path_2: str, name: str, type_name: str) ->
)


class ModelContractError(DbtRuntimeError):
CODE = 10016
MESSAGE = "Contract Error"

def __init__(self, node=None):
super().__init__(self.message(), node)

@property
def type(self):
return "Contract"

def message(self):
return (
"There is a breaking change in the model contract; "
"you may need to create a new version. See: https://docs.getdbt.com/docs/collaborate/publish/model-versions"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This error message looks good to me 👍

Copy link
Contributor

@MichelleArk MichelleArk Mar 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the user is aware they're making a breaking change, and just needs some guidance regarding making a new version of the model - this error message looks good to me too.

That said, I am worried that this error message could be a little opaque for unintented breaking changes, especially as our set of changes are considered breaking changes becomes more nuanced. For example, a user updates a contracted model with constraints from table to view in an attempt to optimize some spend, which raises a breaking change error because the existing constraints can't be validated. Just seeing There is a breaking change in the model contract could be confusing, and perhaps the user would actually choose to just revert that change and make it at a later point (bundle it with some other breaking changes). In that scenario, I'd (as an imaginary user) love to see an error message that looks more like:

There is a breaking change in the model contract: 
 * Updated materialization from `table` to `view` for model with constraints
 * Updated config from contract: true to contract: false
 * Column a removed
 * Column c updated data_type from string to int
 * ...
You may need to create a new version. See: https://docs.getdbt.com/docs/collaborate/publish/model-versions

This may not be concern with our current definition of a breaking changes (any change to column name/data_type => breaking change), but given that we're planning to extend this to be more nuanced soon in #7065, I'd leave some design space for it if this scenario seems important to prioritize the UX of (cc @jtcohen6).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently the change of a materialization is not a contract change, it's a config change. The ticket doesn't mention materialization -- do you want that to be part of the contract change too?

Creating that more user-friendly error message will probably be about twice as much work than everything else so far. If you want me to spend my time doing that, I can.

Copy link
Contributor

@MichelleArk MichelleArk Mar 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Materializations are not part of this issue, but they will be part the work to extend detecting breaking changes in #7065 (which i still blocked by #7067). If we know we'll want more granular error messaging as part of that work, we could set up the foundations here or leave it to #7065.

I agree it's a good deal more work to get that though; we could break off improving the error messaging into a separate issue so it's refined and estimated appropriately.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a brief "reason" to the exception message. We can do more elaborate messaging once the other parts are in place.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(good conversation, no notes!)

)


class RecursionError(DbtRuntimeError):
pass

Expand Down
1 change: 1 addition & 0 deletions core/dbt/graph/selector_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[Uniqu
),
"modified.relation": self.check_modified_factory("same_database_representation"),
"modified.macros": self.check_modified_macros,
"modified.contract": self.check_modified_factory("same_contract"),
}
if selector in state_checks:
checker = state_checks[selector]
Expand Down
1 change: 1 addition & 0 deletions core/dbt/parser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ def update_parsed_node_config(
# compatibility with earlier node-only config.
if config_dict.get("contract", False):
parsed_node.contract = True
parsed_node.build_contract_checksum()
MichelleArk marked this conversation as resolved.
Show resolved Hide resolved

# unrendered_config is used to compare the original database/schema/alias
# values and to handle 'same_config' and 'same_contents' calls
Expand Down
52 changes: 52 additions & 0 deletions tests/functional/defer_state/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,58 @@
- name: name
"""

contract_schema_yml = """
version: 2
models:
- name: view_model
columns:
- name: id
tests:
- unique:
severity: error
- not_null
- name: name
- name: table_model
config:
contract: true
columns:
- name: id
data_type: integer
tests:
- unique:
severity: error
- not_null
- name: name
data_type: text
"""

modified_contract_schema_yml = """
version: 2
models:
- name: view_model
columns:
- name: id
tests:
- unique:
severity: error
- not_null
- name: name
- name: table_model
config:
contract: true
columns:
- name: id
data_type: integer
tests:
- unique:
severity: error
- not_null
- name: name
data_type: text
constraints:
- type: not_null
"""

exposures_yml = """
version: 2
exposures:
Expand Down
41 changes: 39 additions & 2 deletions tests/functional/defer_state/test_modified_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

import pytest

from dbt.tests.util import run_dbt, update_config_file, write_file
from dbt.tests.util import run_dbt, update_config_file, write_file, get_manifest

from dbt.exceptions import CompilationError
from dbt.exceptions import CompilationError, ModelContractError

from tests.functional.defer_state.fixtures import (
seed_csv,
Expand All @@ -18,6 +18,8 @@
exposures_yml,
macros_sql,
infinite_macros_sql,
contract_schema_yml,
modified_contract_schema_yml,
)


Expand Down Expand Up @@ -261,3 +263,38 @@ def test_changed_exposure(self, project):
results = run_dbt(["run", "--models", "+state:modified", "--state", "./state"])
assert len(results) == 1
assert results[0].node.name == "view_model"


class TestChangedContract(BaseModifiedState):
def test_changed_contract(self, project):
self.run_and_save_state()

# update contract for table_model
write_file(contract_schema_yml, "models", "schema.yml")

# This will find the table_model node modified both through a config change
# and by a non-breaking change to contract: true
results = run_dbt(["run", "--models", "state:modified", "--state", "./state"])
assert len(results) == 1
assert results[0].node.name == "table_model"
manifest = get_manifest(project.project_root)
model_unique_id = "model.test.table_model"
model = manifest.nodes[model_unique_id]
expected_unrendered_config = {"contract": True, "materialized": "table"}
assert model.unrendered_config == expected_unrendered_config

# Run it again with "state:modified:contract", still finds modified due to contract: true
results = run_dbt(["run", "--models", "state:modified.contract", "--state", "./state"])
assert len(results) == 1
# save a new state
self.copy_state()

# This should raise because a column has added a constraint
MichelleArk marked this conversation as resolved.
Show resolved Hide resolved
write_file(modified_contract_schema_yml, "models", "schema.yml")
with pytest.raises(ModelContractError):
results = run_dbt(["run", "--models", "state:modified.contract", "--state", "./state"])

# Go back to schema file without contract. Should raise an error.
write_file(schema_yml, "models", "schema.yml")
with pytest.raises(ModelContractError):
results = run_dbt(["run", "--models", "state:modified.contract", "--state", "./state"])