@@ -327,7 +327,21 @@ static void incr_obj_hist_bin(struct obj_hist_bin *pbin,
327
327
struct large_item {
328
328
uint64_t size ;
329
329
struct object_id oid ;
330
+
331
+ /*
332
+ * For blobs and trees the name field is the pathname of the
333
+ * file or directory. Root trees will have a zero-length
334
+ * name. The name field is not currenly used for commits.
335
+ */
330
336
struct strbuf * name ;
337
+
338
+ /*
339
+ * For blobs and trees remember the transient commit from
340
+ * the treewalk so that we can say that this large item
341
+ * first appeared in this commit (relative to the treewalk
342
+ * order).
343
+ */
344
+ struct object_id containing_commit_oid ;
331
345
};
332
346
333
347
struct large_item_vec {
@@ -379,7 +393,8 @@ static void free_large_item_vec(struct large_item_vec *vec)
379
393
static void maybe_insert_large_item (struct large_item_vec * vec ,
380
394
uint64_t size ,
381
395
struct object_id * oid ,
382
- const char * name )
396
+ const char * name ,
397
+ const struct object_id * containing_commit_oid )
383
398
{
384
399
struct strbuf * pbuf_temp ;
385
400
size_t rest_len ;
@@ -419,6 +434,7 @@ static void maybe_insert_large_item(struct large_item_vec *vec,
419
434
memset (& vec -> items [k ], 0 , sizeof (struct large_item ));
420
435
vec -> items [k ].size = size ;
421
436
oidcpy (& vec -> items [k ].oid , oid );
437
+ oidcpy (& vec -> items [k ].containing_commit_oid , containing_commit_oid );
422
438
423
439
vec -> items [k ].name = pbuf_temp ;
424
440
@@ -682,6 +698,14 @@ static int fill_in_base_object(struct survey_stats_base_object *base,
682
698
return 0 ;
683
699
}
684
700
701
+ /*
702
+ * Transient OID of the commit currently being visited
703
+ * during the treewalk. We can use this to create the
704
+ * <ref>:<pathname> pair when a notable large file was
705
+ * created, for example.
706
+ */
707
+ static struct object_id treewalk_transient_commit_oid ;
708
+
685
709
static void traverse_commit_cb (struct commit * commit , void * data )
686
710
{
687
711
struct survey_stats_commits * psc = & survey_stats .commits ;
@@ -691,12 +715,23 @@ static void traverse_commit_cb(struct commit *commit, void *data)
691
715
if ((++ survey_progress_total % 1000 ) == 0 )
692
716
display_progress (survey_progress , survey_progress_total );
693
717
718
+ oidcpy (& treewalk_transient_commit_oid , & commit -> object .oid );
719
+
694
720
fill_in_base_object (& psc -> base , & commit -> object , OBJ_COMMIT , & object_length , NULL );
695
721
696
722
k = commit_list_count (commit -> parents );
697
723
698
- maybe_insert_large_item (psc -> vec_largest_by_nr_parents , k , & commit -> object .oid , NULL );
699
- maybe_insert_large_item (psc -> vec_largest_by_size_bytes , object_length , & commit -> object .oid , NULL );
724
+ /*
725
+ * Send the commit-oid as both the OID and the CONTAINING-COMMIT-OID.
726
+ * This is somewhat redundant, but lets us later do `git name-rev`
727
+ * using the containing-oid in a consistent fashion.
728
+ */
729
+ maybe_insert_large_item (psc -> vec_largest_by_nr_parents , k ,
730
+ & commit -> object .oid , NULL ,
731
+ & commit -> object .oid );
732
+ maybe_insert_large_item (psc -> vec_largest_by_size_bytes , object_length ,
733
+ & commit -> object .oid , NULL ,
734
+ & commit -> object .oid );
700
735
701
736
if (k >= PBIN_VEC_LEN )
702
737
k = PBIN_VEC_LEN - 1 ;
@@ -727,8 +762,12 @@ static void traverse_object_cb_tree(struct object *obj, const char *name)
727
762
728
763
pst -> sum_entries += nr_entries ;
729
764
730
- maybe_insert_large_item (pst -> vec_largest_by_nr_entries , nr_entries , & obj -> oid , name );
731
- maybe_insert_large_item (pst -> vec_largest_by_size_bytes , object_length , & obj -> oid , name );
765
+ maybe_insert_large_item (pst -> vec_largest_by_nr_entries , nr_entries ,
766
+ & obj -> oid , name ,
767
+ & treewalk_transient_commit_oid );
768
+ maybe_insert_large_item (pst -> vec_largest_by_size_bytes , object_length ,
769
+ & obj -> oid , name ,
770
+ & treewalk_transient_commit_oid );
732
771
733
772
qb = qbin (nr_entries );
734
773
incr_obj_hist_bin (& pst -> entry_qbin [qb ], object_length , disk_sizep );
@@ -741,7 +780,9 @@ static void traverse_object_cb_blob(struct object *obj, const char *name)
741
780
742
781
fill_in_base_object (& psb -> base , obj , OBJ_BLOB , & object_length , NULL );
743
782
744
- maybe_insert_large_item (psb -> vec_largest_by_size_bytes , object_length , & obj -> oid , name );
783
+ maybe_insert_large_item (psb -> vec_largest_by_size_bytes , object_length ,
784
+ & obj -> oid , name ,
785
+ & treewalk_transient_commit_oid );
745
786
}
746
787
747
788
static void traverse_object_cb (struct object * obj , const char * name , void * data )
@@ -774,6 +815,7 @@ static void do_treewalk_reachable(struct ref_array *ref_array)
774
815
repo_init_revisions (the_repository , & rev_info , NULL );
775
816
rev_info .tree_objects = 1 ;
776
817
rev_info .blob_objects = 1 ;
818
+ rev_info .tree_blobs_in_commit_order = 1 ;
777
819
load_rev_info (& rev_info , ref_array );
778
820
if (prepare_revision_walk (& rev_info ))
779
821
die (_ ("revision walk setup failed" ));
@@ -783,10 +825,12 @@ static void do_treewalk_reachable(struct ref_array *ref_array)
783
825
survey_progress = start_progress (_ ("Walking reachable objects..." ), 0 );
784
826
}
785
827
828
+ oidcpy (& treewalk_transient_commit_oid , null_oid ());
786
829
traverse_commit_list (& rev_info ,
787
830
traverse_commit_cb ,
788
831
traverse_object_cb ,
789
832
NULL );
833
+ oidcpy (& treewalk_transient_commit_oid , null_oid ());
790
834
791
835
if (survey_opts .show_progress )
792
836
stop_progress (& survey_progress );
@@ -1229,6 +1273,9 @@ static void write_large_item_vec_json(struct json_writer *jw,
1229
1273
jw_object_string (jw , "oid" , oid_to_hex (& pk -> oid ));
1230
1274
if (pk -> name -> len )
1231
1275
jw_object_string (jw , "name" , pk -> name -> buf );
1276
+ if (!is_null_oid (& pk -> containing_commit_oid ))
1277
+ jw_object_string (jw , "commit_oid" ,
1278
+ oid_to_hex (& pk -> containing_commit_oid ));
1232
1279
}
1233
1280
jw_end (jw );
1234
1281
}
0 commit comments