Skip to content

fix(updating): exclude deleted paths on update #1719

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions copier/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,15 @@
)
from .subproject import Subproject
from .template import Task, Template
from .tools import OS, Style, cast_to_bool, normalize_git_path, printf, readlink
from .tools import (
OS,
Style,
cast_to_bool,
escape_git_path,
normalize_git_path,
printf,
readlink,
)
from .types import (
MISSING,
AnyByStrDict,
Expand Down Expand Up @@ -933,6 +941,36 @@ def _apply_update(self) -> None: # noqa: C901
self._execute_tasks(
self.template.migration_tasks("before", self.subproject.template) # type: ignore[arg-type]
)
with local.cwd(old_copy):
self._git_initialize_repo()
git("remote", "add", "real_dst", "file://" + str(subproject_top))
git("fetch", "--depth=1", "real_dst", "HEAD")
# Save a list of files that were intentionally removed in the generated
# project to avoid recreating them during the update.
# Files listed in `skip_if_exists` should only be skipped if they exist.
# They should even be recreated if deleted intentionally.
files_removed = git(
"diff-tree",
"-r",
"--diff-filter=D",
"--name-only",
"HEAD...FETCH_HEAD",
).splitlines()
exclude_plus_removed = list(
set(self.exclude).union(
map(
escape_git_path,
map(
normalize_git_path,
(
path
for path in files_removed
if not self.match_skip(path)
),
),
)
)
)
# Create a copy of the real destination after applying migrations
# but before performing any further update for extracting the diff
# between the temporary destination of the old template and the
Expand All @@ -954,6 +992,8 @@ def _apply_update(self) -> None: # noqa: C901
# Do a normal update in final destination
with replace(
self,
# Don't regenerate intentionally deleted paths
exclude=exclude_plus_removed,
# Files can change due to the historical diff, and those
# changes are not detected in this process, so it's better to
# say nothing than lie.
Expand All @@ -970,6 +1010,7 @@ def _apply_update(self) -> None: # noqa: C901
defaults=True,
quiet=True,
src_path=self.subproject.template.url, # type: ignore[union-attr]
exclude=exclude_plus_removed,
) as new_worker:
new_worker.run_copy()
with local.cwd(new_copy):
Expand All @@ -978,7 +1019,6 @@ def _apply_update(self) -> None: # noqa: C901
# real destination with some special handling of newly added files
# in both the poject and the template.
with local.cwd(old_copy):
self._git_initialize_repo()
git("remote", "add", "dst_copy", "file://" + str(dst_copy))
git("fetch", "--depth=1", "dst_copy", "HEAD:dst_copy")
git("remote", "add", "new_copy", "file://" + str(new_copy))
Expand Down
34 changes: 28 additions & 6 deletions copier/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import colorama
from packaging.version import Version
from pathspec.patterns.gitwildmatch import GitWildMatchPattern
from pydantic import StrictBool

colorama.just_fix_windows_console()
Expand Down Expand Up @@ -184,17 +185,14 @@ def readlink(link: Path) -> Path:
return Path(os.readlink(link))


_re_octal = re.compile(r"\\([0-9]{3})\\([0-9]{3})")


def _re_octal_replace(match: re.Match[str]) -> str:
return bytes([int(match.group(1), 8), int(match.group(2), 8)]).decode("utf8")
_re_whitespace = re.compile(r"^\s+|\s+$")


def normalize_git_path(path: str) -> str:
r"""Convert weird characters returned by Git to normal UTF-8 path strings.

A filename like âñ will be reported by Git as "\\303\\242\\303\\261" (octal notation).
Similarly, a filename like "<tab>foo\b<lf>ar" will be reported as "\tfoo\\b\nar".
This can be disabled with `git config core.quotepath off`.

Args:
Expand All @@ -208,5 +206,29 @@ def normalize_git_path(path: str) -> str:
path = path[1:-1]
# Repair double-quotes
path = path.replace('\\"', '"')
# Unescape escape characters
path = path.encode("latin-1", "backslashreplace").decode("unicode-escape")
# Convert octal to utf8
return _re_octal.sub(_re_octal_replace, path)
return path.encode("latin-1", "backslashreplace").decode("utf-8")


def escape_git_path(path: str) -> str:
"""Escape paths that will be used as literal gitwildmatch patterns.

If the path was returned by a Git command, it should be unescaped completely.
``normalize_git_path`` can be used for this purpose.

Args:
path: The Git path to escape.

Returns:
str: The escaped Git path.
"""
# GitWildMatchPattern.escape does not escape backslashes
# or trailing whitespace.
path = path.replace("\\", "\\\\")
path = GitWildMatchPattern.escape(path)
return _re_whitespace.sub(
lambda match: "".join(f"\\{whitespace}" for whitespace in match.group()),
path,
)
5 changes: 3 additions & 2 deletions docs/configuring.md
Original file line number Diff line number Diff line change
Expand Up @@ -1312,8 +1312,9 @@ configuring `secret: true` in the [advanced prompt format][advanced-prompt-forma
- CLI flags: `-s`, `--skip`
- Default value: `[]`

[Patterns][patterns-syntax] for files/folders that must be skipped if they already
exist.
[Patterns][patterns-syntax] for files/folders that must be skipped only if they already
exist, but always be present. If they do not exist in a project during an `update`
operation, they will be recreated.

!!! example

Expand Down
10 changes: 10 additions & 0 deletions docs/updating.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,16 @@ As you can see here, `copier` does several things:
- Finally, it re-applies the previously obtained diff and then runs the
post-migrations.

### Handling of deleted paths

Template-based files/directories that were deleted in the generated project are
automatically excluded from updates. If you want to recover such a file later on, you
can run `copier recopy` and recommit it to your repository. Subsequent updates for the
path will then be respected again.

An exception to this behavior applies to paths that are matched by `skip_if_exists`.
Their presence is always ensured, even during an `update` operation.

### Recover from a broken update

Usually Copier will replay the last project generation without problems. However,
Expand Down
8 changes: 8 additions & 0 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ def test_temporary_directory_with_git_repo_deletion() -> None:
('quo\\"tes', 'quo"tes'),
('"surrounded"', "surrounded"),
("m4\\303\\2424\\303\\2614a", "m4â4ñ4a"),
("tab\\t", "tab\t"),
("lf\\n", "lf\n"),
("crlf\\r\\n", "crlf\r\n"),
("back\\\\slash", "back\\slash"),
(
"\\a\\b\\f\\n\\t\\vcontrol\\a\\b\\f\\n\\t\\vcharacters\\a\\b\\f\\n\\t\\v",
"\a\b\f\n\t\vcontrol\a\b\f\n\t\vcharacters\a\b\f\n\t\v",
),
],
)
def test_normalizing_git_paths(path: str, normalized: str) -> None:
Expand Down
188 changes: 188 additions & 0 deletions tests/test_updatediff.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,194 @@ def test_skip_update(tmp_path_factory: pytest.TempPathFactory) -> None:
assert not (dst / "skip_me.rej").exists()


@pytest.mark.parametrize(
"file_name",
(
"skip_normal_file",
pytest.param(
"skip_unicode_âñ",
marks=pytest.mark.xfail(
platform.system() in {"Darwin", "Windows"},
reason="OS without proper UTF-8 filesystem.",
),
),
"skip file with whitespace",
" skip_leading_whitespace",
"skip_trailing_whitespace ",
" skip_multi_whitespace ",
pytest.param(
"\tskip_other_whitespace\t\\t",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"\a\f\n\t\vskip_control\a\f\n\t\vcharacters\v\t\n\f\a",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"skip_back\\slash",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"!skip_special",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
),
)
def test_skip_update_deleted(
file_name: str, tmp_path_factory: pytest.TempPathFactory
) -> None:
"""
Ensure that paths in ``skip_if_exists`` are always recreated
if they are absent before updating.
"""
src, dst = map(tmp_path_factory.mktemp, ("src", "dst"))

with local.cwd(src):
build_file_tree(
{
"copier.yaml": "_skip_if_exists: ['*skip*']",
"{{ _copier_conf.answers_file }}.jinja": "{{ _copier_answers|to_yaml }}",
file_name: "1",
"another_file": "foobar",
}
)
git("init")
git("add", ".")
git("commit", "-m1")
git("tag", "1.0.0")
run_copy(str(src), dst, defaults=True, overwrite=True)
skip_me = dst / file_name
answers_file = dst / ".copier-answers.yml"
answers = yaml.safe_load(answers_file.read_text())
assert skip_me.read_text() == "1"
assert answers["_commit"] == "1.0.0"
skip_me.unlink()
with local.cwd(dst):
git("init")
git("add", ".")
git("commit", "-m1")
run_update(dst, overwrite=True)
assert skip_me.exists()
assert skip_me.read_text() == "1"


@pytest.mark.parametrize(
"file_name",
(
"normal_file",
pytest.param(
"unicode_âñ",
marks=pytest.mark.xfail(
platform.system() in {"Darwin", "Windows"},
reason="OS without proper UTF-8 filesystem.",
),
),
"file with whitespace",
" leading_whitespace",
"trailing_whitespace ",
" multi_whitespace ",
pytest.param(
"\tother_whitespace\t\\t",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
# This param accounts for some limitations that would
# otherwise make the test fail:
# * \r in path segment names is converted to \n by Jinja rendering,
# hence the rendered file would be named differently altogether.
# * The pathspec lib does not account for different kinds of escaped
# whitespace at the end of the pattern, only a space.
# If there are control characters at the end of the string
# that would be stripped by .strip(), the pattern would end
# in the backslash that should have escaped it.
"\a\f\n\t\vcontrol\a\f\n\t\vcharacters\v\t\n\f\a",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"back\\slash",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"!special",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
pytest.param(
"dont_wildmatch*",
marks=pytest.mark.skipif(
platform.system() == "Windows",
reason="Disallowed characters in file name",
),
),
),
)
def test_update_deleted_path(
file_name: str, tmp_path_factory: pytest.TempPathFactory
) -> None:
"""
Ensure that deleted paths are not regenerated during updates,
even if the template has changes in that path.
"""
src, dst = map(tmp_path_factory.mktemp, ("src", "dst"))
with local.cwd(src):
build_file_tree(
{
"{{ _copier_conf.answers_file }}.jinja": "{{ _copier_answers|to_yaml }}",
file_name: "foo",
"another_file": "foobar",
"dont_wildmatch": "bar",
}
)
git("init")
git("add", ".")
git("commit", "-m1")
git("tag", "1.0.0")
run_copy(str(src), dst, defaults=True, overwrite=True)
updated_file = dst / file_name
dont_wildmatch = dst / "dont_wildmatch"
answers_file = dst / ".copier-answers.yml"
answers = yaml.safe_load(answers_file.read_text())
assert dont_wildmatch.read_text() == "bar"
assert updated_file.read_text() == "foo"
assert answers["_commit"] == "1.0.0"
updated_file.unlink()
with local.cwd(dst):
git("init")
git("add", ".")
git("commit", "-m1")
with local.cwd(src):
build_file_tree({file_name: "bar", "dont_wildmatch": "baz"})
git("commit", "-am2")
git("tag", "2.0.0")
run_update(dst, overwrite=True)
assert dont_wildmatch.exists()
assert dont_wildmatch.read_text() == "baz"
assert not updated_file.exists()


@pytest.mark.parametrize(
"answers_file", [None, ".copier-answers.yml", ".custom.copier-answers.yaml"]
)
Expand Down
Loading