diff --git a/Documentation/git-backfill.txt b/Documentation/git-backfill.txt index 9b0bae04e9d8f3..ecf2ac428cefb7 100644 --- a/Documentation/git-backfill.txt +++ b/Documentation/git-backfill.txt @@ -9,7 +9,7 @@ git-backfill - Download missing objects in a partial clone SYNOPSIS -------- [verse] -'git backfill' [--batch-size=] +'git backfill' [--batch-size=] [--[no-]sparse] DESCRIPTION ----------- @@ -46,6 +46,10 @@ OPTIONS from the server. This size may be exceeded by the last set of blobs seen at a given path. Default batch size is 16,000. +--[no-]sparse:: + Only download objects if they appear at a path that matches the + current sparse-checkout. + SEE ALSO -------- linkgit:git-clone[1]. diff --git a/Documentation/technical/api-path-walk.txt b/Documentation/technical/api-path-walk.txt index a371b9e6e67b84..83bfe3d665e9fb 100644 --- a/Documentation/technical/api-path-walk.txt +++ b/Documentation/technical/api-path-walk.txt @@ -65,6 +65,14 @@ better off using the revision walk API instead. the revision walk so that the walk emits commits marked with the `UNINTERESTING` flag. +`pl`:: + This pattern list pointer allows focusing the path-walk search to + a set of patterns, only emitting paths that match the given + patterns. See linkgit:gitignore[5] or + linkgit:git-sparse-checkout[1] for details about pattern lists. + When the pattern list uses cone-mode patterns, then the path-walk + API can prune the set of paths it walks to improve performance. + Examples -------- diff --git a/builtin/backfill.c b/builtin/backfill.c index ba8580e4b5e966..b6b8e093eab1e0 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -4,6 +4,7 @@ #include "parse-options.h" #include "repository.h" #include "commit.h" +#include "dir.h" #include "hex.h" #include "tree.h" #include "tree-walk.h" @@ -21,7 +22,7 @@ #include "path-walk.h" static const char * const builtin_backfill_usage[] = { - N_("git backfill [--batch-size=]"), + N_("git backfill [--batch-size=] [--[no-]sparse]"), NULL }; @@ -29,6 +30,7 @@ struct backfill_context { struct repository *repo; struct oid_array current_batch; size_t batch_size; + int sparse; }; static void clear_backfill_context(struct backfill_context *ctx) @@ -84,6 +86,15 @@ static int do_backfill(struct backfill_context *ctx) struct path_walk_info info = PATH_WALK_INFO_INIT; int ret; + if (ctx->sparse) { + CALLOC_ARRAY(info.pl, 1); + if (get_sparse_checkout_patterns(info.pl)) { + clear_pattern_list(info.pl); + free(info.pl); + return error(_("problem loading sparse-checkout")); + } + } + repo_init_revisions(ctx->repo, &revs, ""); handle_revision_arg("HEAD", &revs, 0, 0); @@ -102,6 +113,10 @@ static int do_backfill(struct backfill_context *ctx) clear_backfill_context(ctx); release_revisions(&revs); + if (info.pl) { + clear_pattern_list(info.pl); + free(info.pl); + } return ret; } @@ -111,10 +126,13 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit .repo = repo, .current_batch = OID_ARRAY_INIT, .batch_size = 50000, + .sparse = 0, }; struct option options[] = { OPT_INTEGER(0, "batch-size", &ctx.batch_size, N_("Minimun number of objects to request at a time")), + OPT_BOOL(0, "sparse", &ctx.sparse, + N_("Restrict the missing objects to the current sparse-checkout")), OPT_END(), }; diff --git a/dir.c b/dir.c index 5b2181e5899ce9..16ccfe7e4e868d 100644 --- a/dir.c +++ b/dir.c @@ -1093,10 +1093,6 @@ static void invalidate_directory(struct untracked_cache *uc, dir->dirs[i]->recurse = 0; } -static int add_patterns_from_buffer(char *buf, size_t size, - const char *base, int baselen, - struct pattern_list *pl); - /* Flags for add_patterns() */ #define PATTERN_NOFOLLOW (1<<0) @@ -1186,9 +1182,9 @@ static int add_patterns(const char *fname, const char *base, int baselen, return 0; } -static int add_patterns_from_buffer(char *buf, size_t size, - const char *base, int baselen, - struct pattern_list *pl) +int add_patterns_from_buffer(char *buf, size_t size, + const char *base, int baselen, + struct pattern_list *pl) { char *orig = buf; int i, lineno = 1; diff --git a/dir.h b/dir.h index a3a2f00f5d9273..6cfef5df66091b 100644 --- a/dir.h +++ b/dir.h @@ -467,6 +467,9 @@ void add_patterns_from_file(struct dir_struct *, const char *fname); int add_patterns_from_blob_to_list(struct object_id *oid, const char *base, int baselen, struct pattern_list *pl); +int add_patterns_from_buffer(char *buf, size_t size, + const char *base, int baselen, + struct pattern_list *pl); void parse_path_pattern(const char **string, int *patternlen, unsigned *flags, int *nowildcardlen); void add_pattern(const char *string, const char *base, int baselen, struct pattern_list *pl, int srcpos); diff --git a/path-walk.c b/path-walk.c index dd1acb29d92b45..10c6dedbab82b1 100644 --- a/path-walk.c +++ b/path-walk.c @@ -10,6 +10,7 @@ #include "hex.h" #include "object.h" #include "oid-array.h" +#include "repository.h" #include "revision.h" #include "string-list.h" #include "strmap.h" @@ -119,6 +120,23 @@ static int add_children(struct path_walk_context *ctx, if (type == OBJ_TREE) strbuf_addch(&path, '/'); + if (ctx->info->pl) { + int dtype; + enum pattern_match_result match; + match = path_matches_pattern_list(path.buf, path.len, + path.buf + base_len, &dtype, + ctx->info->pl, + ctx->repo->index); + + if (ctx->info->pl->use_cone_patterns && + match == NOT_MATCHED) + continue; + else if (!ctx->info->pl->use_cone_patterns && + type == OBJ_BLOB && + match != MATCHED) + continue; + } + if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) { CALLOC_ARRAY(list, 1); list->type = type; diff --git a/path-walk.h b/path-walk.h index 3e44c4b8a588e9..090cda3b5cf8f4 100644 --- a/path-walk.h +++ b/path-walk.h @@ -6,6 +6,7 @@ struct rev_info; struct oid_array; +struct pattern_list; /** * The type of a function pointer for the method that is called on a list of @@ -46,6 +47,16 @@ struct path_walk_info { * walk the children of such trees. */ int prune_all_uninteresting; + + /** + * Specify a sparse-checkout definition to match our paths to. Do not + * walk outside of this sparse definition. If the patterns are in + * cone mode, then the search may prune directories that are outside + * of the cone. If not in cone mode, then all tree paths will be + * explored but the path_fn will only be called when the path matches + * the sparse-checkout patterns. + */ + struct pattern_list *pl; }; #define PATH_WALK_INFO_INIT { \ diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index fa3bfe46b5de1c..405c0f43be1694 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -1,6 +1,7 @@ #define USE_THE_REPOSITORY_VARIABLE #include "test-tool.h" +#include "dir.h" #include "environment.h" #include "hex.h" #include "object-name.h" @@ -9,6 +10,7 @@ #include "revision.h" #include "setup.h" #include "parse-options.h" +#include "strbuf.h" #include "path-walk.h" #include "oid-array.h" @@ -67,7 +69,7 @@ static int emit_block(const char *path, struct oid_array *oids, int cmd__path_walk(int argc, const char **argv) { - int res; + int res, stdin_pl = 0; struct rev_info revs = REV_INFO_INIT; struct path_walk_info info = PATH_WALK_INFO_INIT; struct path_walk_test_data data = { 0 }; @@ -82,6 +84,8 @@ int cmd__path_walk(int argc, const char **argv) N_("toggle inclusion of tree objects")), OPT_BOOL(0, "prune", &info.prune_all_uninteresting, N_("toggle pruning of uninteresting paths")), + OPT_BOOL(0, "stdin-pl", &stdin_pl, + N_("read a pattern list over stdin")), OPT_END(), }; @@ -101,6 +105,17 @@ int cmd__path_walk(int argc, const char **argv) info.path_fn = emit_block; info.path_fn_data = &data; + if (stdin_pl) { + struct strbuf in = STRBUF_INIT; + CALLOC_ARRAY(info.pl, 1); + + info.pl->use_cone_patterns = 1; + + strbuf_fread(&in, 2048, stdin); + add_patterns_from_buffer(in.buf, in.len, "", 0, info.pl); + strbuf_release(&in); + } + res = walk_objects_by_path(&info); printf("commits:%" PRIuMAX "\n" @@ -109,6 +124,10 @@ int cmd__path_walk(int argc, const char **argv) "tags:%" PRIuMAX "\n", data.commit_nr, data.tree_nr, data.blob_nr, data.tag_nr); + if (info.pl) { + clear_pattern_list(info.pl); + free(info.pl); + } release_revisions(&revs); return res; } diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 32e2bb1c1327fe..c2acd1339bd454 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -77,6 +77,61 @@ test_expect_success 'do partial clone 2, backfill batch size' ' test_line_count = 0 revs2 ' +test_expect_success 'backfill --sparse' ' + git clone --sparse --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill3 && + + # Initial checkout includes four files at root. + git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 44 missing && + + # Initial sparse-checkout is just the files at root, so we get the + # older versions of the four files at tip. + GIT_TRACE2_EVENT="$(pwd)/sparse-trace1" git \ + -C backfill3 backfill --sparse && + test_trace2_data promisor fetch_count 4 missing && + test_line_count = 40 missing && + + # Expand the sparse-checkout to include 'd' recursively. This + # engages the algorithm to skip the trees for 'a'. Note that + # the "sparse-checkout set" command downloads the objects at tip + # to satisfy the current checkout. + git -C backfill3 sparse-checkout set d && + GIT_TRACE2_EVENT="$(pwd)/sparse-trace2" git \ + -C backfill3 backfill --sparse && + test_trace2_data promisor fetch_count 8 missing && + test_line_count = 24 missing +' + +test_expect_success 'backfill --sparse without cone mode' ' + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill4 && + + # No blobs yet + git -C backfill4 rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + # Define sparse-checkout by filename regardless of parent directory. + # This downloads 6 blobs to satisfy the checkout. + git -C backfill4 sparse-checkout set --no-cone "**/file.1.txt" && + git -C backfill4 checkout main && + + GIT_TRACE2_EVENT="$(pwd)/no-cone-trace1" git \ + -C backfill4 backfill --sparse && + test_trace2_data promisor fetch_count 6 missing && + test_line_count = 36 missing +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 943adc6c8f132f..312bf3c19c176a 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -108,6 +108,41 @@ test_expect_success 'all' ' test_cmp expect.sorted out.sorted ' +test_expect_success 'base & topic, sparse' ' + cat >patterns <<-EOF && + /* + !/*/ + /left/ + EOF + + test-tool path-walk --stdin-pl -- base topic out && + + cat >expect <<-EOF && + COMMIT::$(git rev-parse topic) + COMMIT::$(git rev-parse base) + COMMIT::$(git rev-parse base~1) + COMMIT::$(git rev-parse base~2) + commits:4 + TREE::$(git rev-parse topic^{tree}) + TREE::$(git rev-parse base^{tree}) + TREE::$(git rev-parse base~1^{tree}) + TREE::$(git rev-parse base~2^{tree}) + TREE:left/:$(git rev-parse base:left) + TREE:left/:$(git rev-parse base~2:left) + trees:6 + BLOB:a:$(git rev-parse base~2:a) + BLOB:left/b:$(git rev-parse base~2:left/b) + BLOB:left/b:$(git rev-parse base:left/b) + blobs:3 + tags:0 + EOF + + sort expect >expect.sorted && + sort out >out.sorted && + + test_cmp expect.sorted out.sorted +' + test_expect_success 'topic only' ' test-tool path-walk -- topic >out &&