Skip to content

Commit 1de055f

Browse files
jeffhostetlerdscho
authored andcommitted
survey: add commit-oid to large_item detail
Signed-off-by: Jeff Hostetler <[email protected]>
1 parent 617deae commit 1de055f

File tree

1 file changed

+53
-6
lines changed

1 file changed

+53
-6
lines changed

builtin/survey.c

+53-6
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,21 @@ static void incr_obj_hist_bin(struct obj_hist_bin *pbin,
327327
struct large_item {
328328
uint64_t size;
329329
struct object_id oid;
330+
331+
/*
332+
* For blobs and trees the name field is the pathname of the
333+
* file or directory. Root trees will have a zero-length
334+
* name. The name field is not currenly used for commits.
335+
*/
330336
struct strbuf *name;
337+
338+
/*
339+
* For blobs and trees remember the transient commit from
340+
* the treewalk so that we can say that this large item
341+
* first appeared in this commit (relative to the treewalk
342+
* order).
343+
*/
344+
struct object_id containing_commit_oid;
331345
};
332346

333347
struct large_item_vec {
@@ -379,7 +393,8 @@ static void free_large_item_vec(struct large_item_vec *vec)
379393
static void maybe_insert_large_item(struct large_item_vec *vec,
380394
uint64_t size,
381395
struct object_id *oid,
382-
const char *name)
396+
const char *name,
397+
const struct object_id *containing_commit_oid)
383398
{
384399
struct strbuf *pbuf_temp;
385400
size_t rest_len;
@@ -419,6 +434,7 @@ static void maybe_insert_large_item(struct large_item_vec *vec,
419434
memset(&vec->items[k], 0, sizeof(struct large_item));
420435
vec->items[k].size = size;
421436
oidcpy(&vec->items[k].oid, oid);
437+
oidcpy(&vec->items[k].containing_commit_oid, containing_commit_oid);
422438

423439
vec->items[k].name = pbuf_temp;
424440

@@ -682,6 +698,14 @@ static int fill_in_base_object(struct survey_stats_base_object *base,
682698
return 0;
683699
}
684700

701+
/*
702+
* Transient OID of the commit currently being visited
703+
* during the treewalk. We can use this to create the
704+
* <ref>:<pathname> pair when a notable large file was
705+
* created, for example.
706+
*/
707+
static struct object_id treewalk_transient_commit_oid;
708+
685709
static void traverse_commit_cb(struct commit *commit, void *data)
686710
{
687711
struct survey_stats_commits *psc = &survey_stats.commits;
@@ -691,12 +715,23 @@ static void traverse_commit_cb(struct commit *commit, void *data)
691715
if ((++survey_progress_total % 1000) == 0)
692716
display_progress(survey_progress, survey_progress_total);
693717

718+
oidcpy(&treewalk_transient_commit_oid, &commit->object.oid);
719+
694720
fill_in_base_object(&psc->base, &commit->object, OBJ_COMMIT, &object_length, NULL);
695721

696722
k = commit_list_count(commit->parents);
697723

698-
maybe_insert_large_item(psc->vec_largest_by_nr_parents, k, &commit->object.oid, NULL);
699-
maybe_insert_large_item(psc->vec_largest_by_size_bytes, object_length, &commit->object.oid, NULL);
724+
/*
725+
* Send the commit-oid as both the OID and the CONTAINING-COMMIT-OID.
726+
* This is somewhat redundant, but lets us later do `git name-rev`
727+
* using the containing-oid in a consistent fashion.
728+
*/
729+
maybe_insert_large_item(psc->vec_largest_by_nr_parents, k,
730+
&commit->object.oid, NULL,
731+
&commit->object.oid);
732+
maybe_insert_large_item(psc->vec_largest_by_size_bytes, object_length,
733+
&commit->object.oid, NULL,
734+
&commit->object.oid);
700735

701736
if (k >= PBIN_VEC_LEN)
702737
k = PBIN_VEC_LEN - 1;
@@ -727,8 +762,12 @@ static void traverse_object_cb_tree(struct object *obj, const char *name)
727762

728763
pst->sum_entries += nr_entries;
729764

730-
maybe_insert_large_item(pst->vec_largest_by_nr_entries, nr_entries, &obj->oid, name);
731-
maybe_insert_large_item(pst->vec_largest_by_size_bytes, object_length, &obj->oid, name);
765+
maybe_insert_large_item(pst->vec_largest_by_nr_entries, nr_entries,
766+
&obj->oid, name,
767+
&treewalk_transient_commit_oid);
768+
maybe_insert_large_item(pst->vec_largest_by_size_bytes, object_length,
769+
&obj->oid, name,
770+
&treewalk_transient_commit_oid);
732771

733772
qb = qbin(nr_entries);
734773
incr_obj_hist_bin(&pst->entry_qbin[qb], object_length, disk_sizep);
@@ -741,7 +780,9 @@ static void traverse_object_cb_blob(struct object *obj, const char *name)
741780

742781
fill_in_base_object(&psb->base, obj, OBJ_BLOB, &object_length, NULL);
743782

744-
maybe_insert_large_item(psb->vec_largest_by_size_bytes, object_length, &obj->oid, name);
783+
maybe_insert_large_item(psb->vec_largest_by_size_bytes, object_length,
784+
&obj->oid, name,
785+
&treewalk_transient_commit_oid);
745786
}
746787

747788
static void traverse_object_cb(struct object *obj, const char *name, void *data)
@@ -774,6 +815,7 @@ static void do_treewalk_reachable(struct ref_array *ref_array)
774815
repo_init_revisions(the_repository, &rev_info, NULL);
775816
rev_info.tree_objects = 1;
776817
rev_info.blob_objects = 1;
818+
rev_info.tree_blobs_in_commit_order = 1;
777819
load_rev_info(&rev_info, ref_array);
778820
if (prepare_revision_walk(&rev_info))
779821
die(_("revision walk setup failed"));
@@ -783,10 +825,12 @@ static void do_treewalk_reachable(struct ref_array *ref_array)
783825
survey_progress = start_progress(_("Walking reachable objects..."), 0);
784826
}
785827

828+
oidcpy(&treewalk_transient_commit_oid, null_oid());
786829
traverse_commit_list(&rev_info,
787830
traverse_commit_cb,
788831
traverse_object_cb,
789832
NULL);
833+
oidcpy(&treewalk_transient_commit_oid, null_oid());
790834

791835
if (survey_opts.show_progress)
792836
stop_progress(&survey_progress);
@@ -1229,6 +1273,9 @@ static void write_large_item_vec_json(struct json_writer *jw,
12291273
jw_object_string(jw, "oid", oid_to_hex(&pk->oid));
12301274
if (pk->name->len)
12311275
jw_object_string(jw, "name", pk->name->buf);
1276+
if (!is_null_oid(&pk->containing_commit_oid))
1277+
jw_object_string(jw, "commit_oid",
1278+
oid_to_hex(&pk->containing_commit_oid));
12321279
}
12331280
jw_end(jw);
12341281
}

0 commit comments

Comments
 (0)