Skip to content

Commit

Permalink
writing complete
Browse files Browse the repository at this point in the history
  • Loading branch information
kylemumma committed Jun 25, 2024
1 parent 9f48db5 commit 472faaa
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 103 deletions.
24 changes: 14 additions & 10 deletions snuba/cli/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,15 +384,19 @@ def add_node(

@migrations.command()
@click.argument("storage_path", type=str)
@click.option(
"--name",
type=str,
)
def generate(storage_path: str, migration_name: Optional[str] = None) -> None:
@click.option("--name", type=str, help="optional name for the migration")
def generate(storage_path: str, name: Optional[str] = None) -> None:
"""
Given a path to modified storage yaml definition (inside of snuba repo),
creates a snuba migration for the given modifications.
The migration will be written into the local directory. The user is responsible for making
Given a path to user-modified storage.yaml definition (inside snuba/datasets/configuration/*/storages/*.py),
and an optional name for the migration,
generates a snuba migration based on the schema modifications to the storage.yaml.
Currently only column addition is supported.
The migration is generated based on the diff between HEAD and working dir. Therefore modifications to the
storage should be uncommitted in the working dir.
The generated migration will be written into the local directory. The user is responsible for making
the commit, PR, and merging.
"""
expected_pattern = r"(.+/)?snuba/datasets/configuration/.*/storages/.*\.(yml|yaml)"
Expand All @@ -401,5 +405,5 @@ def generate(storage_path: str, migration_name: Optional[str] = None) -> None:
f"Storage path {storage_path} does not match expected pattern {expected_pattern}"
)

autogeneration.generate(storage_path, migration_name=migration_name)
click.echo("This function is under construction.")
path = autogeneration.generate(storage_path, migration_name=name)
click.echo(f"Migration successfully generated at {path}")
76 changes: 10 additions & 66 deletions snuba/migrations/autogeneration/diff.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
import os
from typing import Any, Sequence, cast

import yaml
from black import Mode, format_str # type: ignore

from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.configuration.utils import parse_columns
from snuba.migrations import group_loader
from snuba.migrations.columns import MigrationModifiers
from snuba.migrations.operations import AddColumn, DropColumn, OperationTarget
from snuba.utils.schemas import Column, ColumnType, SchemaModifiers
Expand All @@ -16,27 +11,21 @@
"""


def generate_migration(
oldstorage: str, newstorage: str, migration_name: str = "generated_migration"
def generate_python_migration(
oldstorage: dict[str, Any], newstorage: dict[str, Any]
) -> str:
"""
Given 2 storage.yaml files, representing the diff of a modified storage.yaml
i.e. original and modified, generates a python migration based on the changes
and writes it to the correct place in the repo. Returns the path where the file
was written.
Input:
2 storage.yaml files in yaml.safe_load format.
These representing the diff of a modified storage.yaml i.e. original and modified
Generates and returns a python migration based on the changes.
"""
old_stor_dict = yaml.safe_load(oldstorage)
new_stor_dict = yaml.safe_load(newstorage)
forwards, backwards = storage_diff_to_migration_ops(old_stor_dict, new_stor_dict)
migration = _migration_ops_to_migration(forwards, backwards)
return _write_migration(
migration,
StorageSetKey(new_stor_dict["storage"]["set_key"]),
migration_name,
)
forwards, backwards = _storage_diff_to_migration_ops(oldstorage, newstorage)
return _migration_ops_to_migration(forwards, backwards)


def storage_diff_to_migration_ops(
def _storage_diff_to_migration_ops(
oldstorage: dict[str, Any], newstorage: dict[str, Any]
) -> tuple[list[AddColumn], list[DropColumn]]:
"""
Expand Down Expand Up @@ -119,51 +108,6 @@ def backwards_ops(self) -> Sequence[operations.SqlOperation]:
"""


def _write_migration(
migration: str,
storage_set: StorageSetKey,
migration_name: str,
) -> str:
"""
Input:
migration - python migration file (see snuba/snuba_migrations/*/000x_*.py for examples)
storage_set - the key of the storage-set you are writing the migration for
Writes the given migration to a new file at the correct place in the repo
(which is determined by storage-set key), and adds a reference to the new migration
in the group loader (snuba/group_loader/migrations.py)
"""
# make sure storage_set migration path exist
path = "snuba/snuba_migrations/" + storage_set.value
if not os.path.exists(path):
raise ValueError(
f"Migration path: '{path}' does not exist, perhaps '{storage_set.value}' is not a valid storage set key?"
)

# grab the group_loader for the storage set
group_loader_name = (
"".join([word.capitalize() for word in storage_set.value.split("_")]) + "Loader"
)
loader = getattr(group_loader, group_loader_name)()
assert isinstance(loader, group_loader.GroupLoader)

# get the next migration number
existing_migrations = loader.get_migrations()
if not existing_migrations:
nextnum = 0
nextnum = int(existing_migrations[-1].split("_")[0]) + 1

# write migration to file
newpath = f"{path}/{str(nextnum).zfill(4)}_{migration_name}.py"
if os.path.exists(newpath):
# this should never happen, but just in case
raise ValueError(
f"Error: The migration number {nextnum} was larger than the last migration in the group loader '{group_loader_name}', but the migration already exists"
)
with open(newpath, "w") as f:
f.write(format_str(migration, mode=Mode()))
return newpath


def _is_valid_add_column(
oldstorage: dict[str, Any], newstorage: dict[str, Any]
) -> tuple[bool, str]:
Expand Down
70 changes: 62 additions & 8 deletions snuba/migrations/autogeneration/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,29 @@
import subprocess
from typing import Optional

from snuba.migrations.autogeneration.diff import generate_migration
from black import Mode, format_str # type: ignore
from yaml import safe_load

from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import group_loader
from snuba.migrations.autogeneration.diff import generate_python_migration


def generate(storage_path: str, migration_name: Optional[str] = None) -> str:
# load into memory the given storage and the version of it at HEAD
new_storage, old_storage = get_working_and_head(storage_path)
tmpnew, tmpold = get_working_and_head(storage_path)
new_storage = safe_load(tmpnew)
old_storage = safe_load(tmpold)

# generate the migration operations
if migration_name:
return generate_migration(
old_storage, new_storage, migration_name=migration_name
)
else:
return generate_migration(old_storage, new_storage)
migration = generate_python_migration(old_storage, new_storage)

# write the migration and return the path
return write_migration(
migration,
StorageSetKey(new_storage["storage"]["set_key"]),
migration_name if migration_name else "generated_migration",
)


def get_working_and_head(path: str) -> tuple[str, str]:
Expand Down Expand Up @@ -58,3 +67,48 @@ def get_working_and_head(path: str) -> tuple[str, str]:
working_file = f.read()

return (working_file, head_file)


def write_migration(
migration: str,
storage_set: StorageSetKey,
migration_name: str,
) -> str:
"""
Input:
migration - python migration file (see snuba/snuba_migrations/*/000x_*.py for examples)
storage_set - the key of the storage-set you are writing the migration for
Writes the given migration to a new file at the correct place in the repo
(which is determined by storage-set key), and adds a reference to the new migration
in the group loader (snuba/group_loader/migrations.py)
"""
# make sure storage_set migration path exist
path = "snuba/snuba_migrations/" + storage_set.value
if not os.path.exists(path):
raise ValueError(
f"Migration path: '{path}' does not exist, perhaps '{storage_set.value}' is not a valid storage set key?"
)

# grab the group_loader for the storage set
group_loader_name = (
"".join([word.capitalize() for word in storage_set.value.split("_")]) + "Loader"
)
loader = getattr(group_loader, group_loader_name)()
assert isinstance(loader, group_loader.GroupLoader)

# get the next migration number
existing_migrations = loader.get_migrations()
if not existing_migrations:
nextnum = 0
nextnum = int(existing_migrations[-1].split("_")[0]) + 1

# write migration to file
newpath = f"{path}/{str(nextnum).zfill(4)}_{migration_name}.py"
if os.path.exists(newpath):
# this should never happen, but just in case
raise ValueError(
f"Error: The migration number {nextnum} was larger than the last migration in the group loader '{group_loader_name}', but the migration already exists"
)
with open(newpath, "w") as f:
f.write(format_str(migration, mode=Mode()))
return newpath
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import re
from typing import Any

import pytest
import yaml
from black import Mode, format_str # type: ignore

from snuba.migrations.autogeneration.diff import generate_migration
from snuba.migrations.autogeneration.diff import generate_python_migration


def mockstoragewithcolumns(cols: list[str]) -> str:
def mockstoragewithcolumns(cols: list[str]) -> Any:
colstr = ",\n ".join([s for s in cols])
return f"""
storage = f"""
version: v1
kind: writable_storage
name: errors
Expand All @@ -30,6 +31,7 @@ def mockstoragewithcolumns(cols: list[str]) -> str:
local_table_name: errors_local
dist_table_name: errors_dist
"""
return yaml.safe_load(storage)


def test_add_column() -> None:
Expand Down Expand Up @@ -146,19 +148,12 @@ def backwards_ops(self) -> Sequence[operations.SqlOperation]:
),
]
"""
written_path = generate_migration(
mockstoragewithcolumns(cols),
mockstoragewithcolumns(new_cols),
migration_name="kyles_migration",
migration = generate_python_migration(
mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols)
)
assert re.match(
r"snuba/snuba_migrations/events/[0-9][0-9][0-9][0-9]_kyles_migration.py",
written_path,
assert format_str(migration, mode=Mode()) == format_str(
expected_migration, mode=Mode()
)
with open(written_path, mode="r") as f:
assert format_str(f.read(), mode=Mode()) == format_str(
expected_migration, mode=Mode()
)


def test_modify_column() -> None:
Expand All @@ -172,7 +167,7 @@ def test_modify_column() -> None:
ValueError,
match="Modification to columns in unsupported, column 'timestamp' was modified or reordered",
):
generate_migration(
generate_python_migration(
mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols)
)

Expand All @@ -190,7 +185,7 @@ def test_reorder_columns() -> None:
ValueError,
match="Modification to columns in unsupported, column 'timestamp' was modified or reordered",
):
generate_migration(
generate_python_migration(
mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols)
)

Expand All @@ -207,6 +202,6 @@ def test_delete_column() -> None:
"{ name: newcol1, type: DateTime }",
]
with pytest.raises(ValueError, match="Column removal is not supported"):
generate_migration(
generate_python_migration(
mockstoragewithcolumns(cols), mockstoragewithcolumns(new_cols)
)
18 changes: 17 additions & 1 deletion tests/migrations/autogeneration/test_ui.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os
import subprocess
from glob import glob

from snuba.migrations.autogeneration.main import get_working_and_head
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations.autogeneration.main import get_working_and_head, write_migration


def test_get_working_and_head() -> None:
Expand Down Expand Up @@ -44,3 +46,17 @@ def test_get_working_and_head() -> None:
new_storage, old_storage = get_working_and_head(os.path.join(dir, fname))
assert new_storage == "hello world\ngoodbye world"
assert old_storage == "hello world\n"


def test_write_migration() -> None:
content = "bllop"
name = "kyles_migration"
write_migration(content, StorageSetKey.EVENTS, name)
written_migration = sorted(
glob(f"snuba/snuba_migrations/events/[0-9][0-9][0-9][0-9]_{name}.py")
)[0]
try:
with open(written_migration) as f:
assert f.read().strip() == content
finally:
os.remove(written_migration)

0 comments on commit 472faaa

Please sign in to comment.