Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

535 implement postgres style index creation #573

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ target/
*.swp
*.swo

# vscode
.vscode/

# Mypy cache
.mypy_cache/

Expand Down
8 changes: 8 additions & 0 deletions dbt/adapters/sqlserver/relation_configs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from dbt.adapters.sqlserver.relation_configs.index import (
SQLServerIndexConfig,
SQLServerIndexConfigChange,
SQLServerIndexType,
)
from dbt.adapters.sqlserver.relation_configs.policies import (
MAX_CHARACTERS_IN_IDENTIFIER,
SQLServerIncludePolicy,
Expand All @@ -10,4 +15,7 @@
"SQLServerIncludePolicy",
"SQLServerQuotePolicy",
"SQLServerRelationType",
"SQLServerIndexType",
"SQLServerIndexConfig",
"SQLServerIndexConfigChange",
]
241 changes: 241 additions & 0 deletions dbt/adapters/sqlserver/relation_configs/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import FrozenSet, Optional, Set, Tuple

import agate
from dbt.adapters.exceptions import IndexConfigError, IndexConfigNotDictError
from dbt.adapters.relation_configs import (
RelationConfigBase,
RelationConfigChange,
RelationConfigChangeAction,
RelationConfigValidationMixin,
RelationConfigValidationRule,
)
from dbt_common.dataclass_schema import StrEnum, ValidationError, dbtClassMixin
from dbt_common.exceptions import DbtRuntimeError
from dbt_common.utils import encoding as dbt_encoding


# Handle datetime now for testing.
def datetime_now(tz: Optional[timezone] = timezone.utc) -> datetime:
return datetime.now(tz)


# ALTERED FROM:
# github.com/dbt-labs/dbt-postgres/blob/main/dbt/adapters/postgres/relation_configs/index.py
class SQLServerIndexType(StrEnum):
# btree = "btree" #All SQL Server common indexes are B-tree indexes
# hash = "hash" #A hash index can exist only on a memory-optimized table.
# TODO Implement memory optimized table materialization.
clustered = "clustered" # Cant't have included columns
nonclustered = "nonclustered"
columnstore = "columnstore" # Cant't have included columns or unique config

@classmethod
def default(cls) -> "SQLServerIndexType":
return cls("nonclustered")

@classmethod
def valid_types(cls):
return tuple(cls)


@dataclass(frozen=True, eq=True, unsafe_hash=True)
class SQLServerIndexConfig(RelationConfigBase, RelationConfigValidationMixin, dbtClassMixin):
"""
This config follows the specs found here:

https://learn.microsoft.com/en-us/sql/t-sql/statements/create-index-transact-sql

The following parameters are configurable by dbt:
- name: the name of the index in the database, isn't predictable since we apply a timestamp
- unique: checks for duplicate values when the index is created and on data updates
- type: the index type method to be used
- columns: the columns names in the index
- included_columns: the extra included columns names in the index

"""

name: str = field(default="", hash=False, compare=False)
columns: Tuple[str, ...] = field(
default_factory=tuple, hash=True
) # Keeping order is important
unique: bool = field(
default=False, hash=True
) # Uniqueness can be a property of both clustered and nonclustered indexes.
type: SQLServerIndexType = field(default=SQLServerIndexType.default(), hash=True)
included_columns: FrozenSet[str] = field(
default_factory=frozenset, hash=True
) # Keeping order is not important

@property
def validation_rules(self) -> Set[RelationConfigValidationRule]:
return {
RelationConfigValidationRule(
validation_check=True if self.columns else False,
validation_error=DbtRuntimeError("'columns' is a required property"),
),
RelationConfigValidationRule(
validation_check=(
True
if not self.included_columns
else self.type == SQLServerIndexType.nonclustered
),
validation_error=DbtRuntimeError(
"Non-clustered indexes are the only index types that can include extra columns"
),
),
RelationConfigValidationRule(
validation_check=(
True
if not self.unique
else self.type
in (SQLServerIndexType.clustered, SQLServerIndexType.nonclustered)
),
validation_error=DbtRuntimeError(
"Clustered and nonclustered indexes are the only types that can be unique"
),
),
RelationConfigValidationRule(
validation_check=True if self.type in SQLServerIndexType.valid_types() else False,
validation_error=DbtRuntimeError(
f"Invalid index type: {self.type}, valid types:"
+ f"{SQLServerIndexType.valid_types()}"
),
),
}

@classmethod
def from_dict(cls, config_dict) -> "SQLServerIndexConfig":
kwargs_dict = {
"name": config_dict.get("name"),
"columns": tuple(column for column in config_dict.get("columns", tuple())),
"unique": config_dict.get("unique"),
"type": config_dict.get("type"),
"included_columns": frozenset(
column for column in config_dict.get("included_columns", set())
),
}
index: "SQLServerIndexConfig" = super().from_dict(kwargs_dict) # type: ignore
return index

@classmethod
def parse_model_node(cls, model_node_entry: dict) -> dict:
config_dict = {
"columns": tuple(model_node_entry.get("columns", tuple())),
"unique": model_node_entry.get("unique"),
"type": model_node_entry.get("type"),
"included_columns": frozenset(model_node_entry.get("included_columns", set())),
}
return config_dict

@classmethod
def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict:
config_dict = {
"name": relation_results_entry.get("name"),
"columns": tuple(relation_results_entry.get("columns", "").split(",")),
"unique": relation_results_entry.get("unique"),
"type": relation_results_entry.get("type"),
"included_columns": set(relation_results_entry.get("included_columns", "").split(",")),
}
return config_dict

@property
def as_node_config(self) -> dict:
"""
Returns: a dictionary that can be passed into `get_create_index_sql()`
"""
node_config = {
"columns": tuple(self.columns),
"unique": self.unique,
"type": self.type.value,
"included_columns": frozenset(self.included_columns),
}
return node_config

def render(self, relation):
# We append the current timestamp to the index name because otherwise
# the index will only be created on every other run. See
# https://github.com/dbt-labs/dbt-core/issues/1945#issuecomment-576714925
# for an explanation.

now = datetime_now(tz=timezone.utc).isoformat()
inputs = self.columns + tuple((relation.render(), str(self.unique), str(self.type), now))
string = "_".join(inputs)
print(f"Actual string before MD5: {string}")
return dbt_encoding.md5(string)

@classmethod
def parse(cls, raw_index) -> Optional["SQLServerIndexConfig"]:
if raw_index is None:
return None
try:
if not isinstance(raw_index, dict):
raise IndexConfigNotDictError(raw_index)
cls.validate(raw_index)
return cls.from_dict(raw_index)
except ValidationError as exc:
raise IndexConfigError(exc)
except TypeError:
raise IndexConfigNotDictError(raw_index)


@dataclass(frozen=True, eq=True, unsafe_hash=True)
class SQLServerIndexConfigChange(RelationConfigChange, RelationConfigValidationMixin):
"""
Example of an index change:
{
"action": "create",
"context": {
"name": "", # we don't know the name since it gets created as a hash at runtime
"columns": ["column_1", "column_3"],
"type": "clustered",
"unique": True
}
},
{
"action": "drop",
"context": {
"name": "index_abc", # we only need this to drop, but we need the rest to compare
"columns": ["column_1"],
"type": "nonclustered",
"unique": True
}
}
"""

# TODO: Implement the change actions on the adapter
context: SQLServerIndexConfig

@property
def requires_full_refresh(self) -> bool:
return False

@property
def validation_rules(self) -> Set[RelationConfigValidationRule]:
return {
RelationConfigValidationRule(
validation_check=self.action
in {RelationConfigChangeAction.create, RelationConfigChangeAction.drop},
validation_error=DbtRuntimeError(
"Invalid operation, only `drop` and `create` are supported for indexes."
),
),
RelationConfigValidationRule(
validation_check=not (
self.action == RelationConfigChangeAction.drop and self.context.name is None
),
validation_error=DbtRuntimeError(
"Invalid operation, attempting to drop an index with no name."
),
),
RelationConfigValidationRule(
validation_check=not (
self.action == RelationConfigChangeAction.create
and self.context.columns == set()
),
validation_error=DbtRuntimeError(
"Invalid operations, attempting to create an index with no columns."
),
),
}
11 changes: 9 additions & 2 deletions dbt/adapters/sqlserver/sqlserver_adapter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from typing import Optional
from typing import Any, Optional

import dbt.exceptions
from dbt.adapters.base.impl import ConstraintSupport
from dbt.adapters.base import ConstraintSupport, available
from dbt.adapters.fabric import FabricAdapter
from dbt.contracts.graph.nodes import ConstraintType

from dbt.adapters.sqlserver.relation_configs import SQLServerIndexConfig
from dbt.adapters.sqlserver.sqlserver_column import SQLServerColumn
from dbt.adapters.sqlserver.sqlserver_configs import SQLServerConfigs
from dbt.adapters.sqlserver.sqlserver_connections import SQLServerConnectionManager
from dbt.adapters.sqlserver.sqlserver_relation import SQLServerRelation

Expand All @@ -18,6 +20,7 @@ class SQLServerAdapter(FabricAdapter):
ConnectionManager = SQLServerConnectionManager
Column = SQLServerColumn
Relation = SQLServerRelation
AdapterSpecificConfigs = SQLServerConfigs

CONSTRAINT_SUPPORT = {
ConstraintType.check: ConstraintSupport.ENFORCED,
Expand Down Expand Up @@ -59,3 +62,7 @@ def render_model_constraint(cls, constraint) -> Optional[str]:
@classmethod
def date_function(cls):
return "getdate()"

@available
def parse_index(self, raw_index: Any) -> Optional[SQLServerIndexConfig]:
return SQLServerIndexConfig.parse(raw_index)
5 changes: 4 additions & 1 deletion dbt/adapters/sqlserver/sqlserver_configs.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from dataclasses import dataclass
from typing import Optional, Tuple

from dbt.adapters.fabric import FabricConfigs

from dbt.adapters.sqlserver.relation_configs import SQLServerIndexConfig


@dataclass
class SQLServerConfigs(FabricConfigs):
pass
indexes: Optional[Tuple[SQLServerIndexConfig]] = None
18 changes: 18 additions & 0 deletions dbt/include/sqlserver/macros/adapter/indexes.sql
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,21 @@
{% endif %}
end
{% endmacro %}


{% macro sqlserver__get_create_index_sql(relation, index_dict) -%}
{%- set index_config = adapter.parse_index(index_dict) -%}
{%- set comma_separated_columns = ", ".join(index_config.columns) -%}
{%- set index_name = index_config.render(relation) -%}

{# Validations are made on the adapter class SQLServerIndexConfig to control resulting sql #}
create
{% if index_config.unique -%} unique {% endif %}{{ index_config.type }}
index "{{ index_name }}"
on {{ relation }}
({{ comma_separated_columns }})
{% if index_config.included_columns -%}
include ({{ ", ".join(index_config.included_columns) }})
{% endif %}

{%- endmacro %}
Loading