@@ -41,6 +41,15 @@ static struct survey_refs_wanted default_ref_options = {
4141struct survey_opts {
4242 int verbose ;
4343 int show_progress ;
44+
45+ int show_largest_commits_by_nr_parents ;
46+ int show_largest_commits_by_size_bytes ;
47+
48+ int show_largest_trees_by_nr_entries ;
49+ int show_largest_trees_by_size_bytes ;
50+
51+ int show_largest_blobs_by_size_bytes ;
52+
4453 int top_nr ;
4554 struct survey_refs_wanted refs ;
4655};
@@ -138,6 +147,87 @@ static void incr_obj_hist_bin(struct obj_hist_bin *pbin,
138147 pbin -> cnt_seen ++ ;
139148}
140149
150+ /*
151+ * Remember the largest n objects for some scaling dimension. This
152+ * could be the observed object size or number of entries in a tree.
153+ * We'll use this to generate a sorted vector in the output for that
154+ * dimension.
155+ */
156+ struct large_item {
157+ uint64_t size ;
158+ struct object_id oid ;
159+ };
160+
161+ struct large_item_vec {
162+ char * dimension_label ;
163+ char * item_label ;
164+ uint64_t nr_items ;
165+ struct large_item items [FLEX_ARRAY ]; /* nr_items */
166+ };
167+
168+ static struct large_item_vec * alloc_large_item_vec (const char * dimension_label ,
169+ const char * item_label ,
170+ uint64_t nr_items )
171+ {
172+ struct large_item_vec * vec ;
173+ size_t flex_len = nr_items * sizeof (struct large_item );
174+
175+ if (!nr_items )
176+ return NULL ;
177+
178+ vec = xcalloc (1 , (sizeof (struct large_item_vec ) + flex_len ));
179+ vec -> dimension_label = strdup (dimension_label );
180+ vec -> item_label = strdup (item_label );
181+ vec -> nr_items = nr_items ;
182+
183+ return vec ;
184+ }
185+
186+ static void free_large_item_vec (struct large_item_vec * vec )
187+ {
188+ if (!vec )
189+ return ;
190+
191+ free (vec -> dimension_label );
192+ free (vec -> item_label );
193+ free (vec );
194+ }
195+
196+ static void maybe_insert_large_item (struct large_item_vec * vec ,
197+ uint64_t size ,
198+ struct object_id * oid )
199+ {
200+ size_t rest_len ;
201+ size_t k ;
202+
203+ if (!vec || !vec -> nr_items )
204+ return ;
205+
206+ /*
207+ * Since the odds an object being among the largest n
208+ * is small, shortcut and see if it is smaller than
209+ * the smallest one in our set and quickly reject it.
210+ */
211+ if (size < vec -> items [vec -> nr_items - 1 ].size )
212+ return ;
213+
214+ for (k = 0 ; k < vec -> nr_items ; k ++ ) {
215+ if (size < vec -> items [k ].size )
216+ continue ;
217+
218+ /* push items[k..] down one and insert it here */
219+
220+ rest_len = (vec -> nr_items - k - 1 ) * sizeof (struct large_item );
221+ if (rest_len )
222+ memmove (& vec -> items [k + 1 ], & vec -> items [k ], rest_len );
223+
224+ memset (& vec -> items [k ], 0 , sizeof (struct large_item ));
225+ vec -> items [k ].size = size ;
226+ oidcpy (& vec -> items [k ].oid , oid );
227+ return ;
228+ }
229+ }
230+
141231/*
142232 * Common fields for any type of object.
143233 */
@@ -183,6 +273,9 @@ struct survey_stats_commits {
183273 * Count of commits with k parents.
184274 */
185275 uint32_t parent_cnt_pbin [PBIN_VEC_LEN ];
276+
277+ struct large_item_vec * vec_largest_by_nr_parents ;
278+ struct large_item_vec * vec_largest_by_size_bytes ;
186279};
187280
188281/*
@@ -192,11 +285,18 @@ struct survey_stats_trees {
192285 struct survey_stats_base_object base ;
193286
194287 /*
195- * In the following, nr_entries refers to the number of files or
196- * subdirectories in a tree. We are interested in how wide the
197- * tree is and if the repo has gigantic directories.
288+ * Keep a vector of the trees with the most number of entries.
289+ * This gives us a feel for the width of a tree when there are
290+ * gigantic directories.
198291 */
199- uint64_t max_entries ; /* max(nr_entries) -- the width of the largest tree */
292+ struct large_item_vec * vec_largest_by_nr_entries ;
293+
294+ /*
295+ * Keep a vector of the trees with the largest size in bytes.
296+ * The contents of this may or may not match items in the other
297+ * vector, since entryname length can alter the results.
298+ */
299+ struct large_item_vec * vec_largest_by_size_bytes ;
200300
201301 /*
202302 * Computing the sum of the number of entries across all trees
@@ -216,6 +316,11 @@ struct survey_stats_trees {
216316 */
217317struct survey_stats_blobs {
218318 struct survey_stats_base_object base ;
319+
320+ /*
321+ * Remember the OIDs of the largest n blobs.
322+ */
323+ struct large_item_vec * vec_largest_by_size_bytes ;
219324};
220325
221326struct survey_report_object_summary {
@@ -396,6 +501,12 @@ struct survey_context {
396501
397502static void clear_survey_context (struct survey_context * ctx )
398503{
504+ free_large_item_vec (ctx -> report .reachable_objects .commits .vec_largest_by_nr_parents );
505+ free_large_item_vec (ctx -> report .reachable_objects .commits .vec_largest_by_size_bytes );
506+ free_large_item_vec (ctx -> report .reachable_objects .trees .vec_largest_by_nr_entries );
507+ free_large_item_vec (ctx -> report .reachable_objects .trees .vec_largest_by_size_bytes );
508+ free_large_item_vec (ctx -> report .reachable_objects .blobs .vec_largest_by_size_bytes );
509+
399510 ref_array_clear (& ctx -> ref_array );
400511 strvec_clear (& ctx -> refs );
401512}
@@ -608,6 +719,32 @@ static void survey_report_commit_parents(struct survey_context *ctx)
608719 clear_table (& table );
609720}
610721
722+ static void survey_report_largest_vec (struct large_item_vec * vec )
723+ {
724+ struct survey_table table = SURVEY_TABLE_INIT ;
725+ struct strbuf size = STRBUF_INIT ;
726+
727+ if (!vec || !vec -> nr_items )
728+ return ;
729+
730+ table .table_name = vec -> dimension_label ;
731+ strvec_pushl (& table .header , "Size" , "OID" , NULL );
732+
733+ for (size_t k = 0 ; k < vec -> nr_items ; k ++ ) {
734+ struct large_item * pk = & vec -> items [k ];
735+ if (!is_null_oid (& pk -> oid )) {
736+ strbuf_reset (& size );
737+ strbuf_addf (& size , "%" PRIuMAX , (uintmax_t )pk -> size );
738+
739+ insert_table_rowv (& table , size .buf , oid_to_hex (& pk -> oid ), NULL );
740+ }
741+ }
742+ strbuf_release (& size );
743+
744+ print_table_plaintext (& table );
745+ clear_table (& table );
746+ }
747+
611748static void survey_report_plaintext_refs (struct survey_context * ctx )
612749{
613750 struct survey_report_ref_summary * refs = & ctx -> report .refs ;
@@ -787,6 +924,12 @@ static void survey_report_plaintext(struct survey_context *ctx)
787924 & ctx -> report .top_paths_by_inflate [REPORT_TYPE_TREE ]);
788925 survey_report_plaintext_sorted_size (
789926 & ctx -> report .top_paths_by_inflate [REPORT_TYPE_BLOB ]);
927+
928+ survey_report_largest_vec (ctx -> report .reachable_objects .commits .vec_largest_by_nr_parents );
929+ survey_report_largest_vec (ctx -> report .reachable_objects .commits .vec_largest_by_size_bytes );
930+ survey_report_largest_vec (ctx -> report .reachable_objects .trees .vec_largest_by_nr_entries );
931+ survey_report_largest_vec (ctx -> report .reachable_objects .trees .vec_largest_by_size_bytes );
932+ survey_report_largest_vec (ctx -> report .reachable_objects .blobs .vec_largest_by_size_bytes );
790933}
791934
792935/*
@@ -858,6 +1001,27 @@ static int survey_load_config_cb(const char *var, const char *value,
8581001 ctx -> opts .show_progress = git_config_bool (var , value );
8591002 return 0 ;
8601003 }
1004+ if (!strcmp (var , "survey.showcommitparents" )) {
1005+ ctx -> opts .show_largest_commits_by_nr_parents = git_config_ulong (var , value , cctx -> kvi );
1006+ return 0 ;
1007+ }
1008+ if (!strcmp (var , "survey.showcommitsizes" )) {
1009+ ctx -> opts .show_largest_commits_by_size_bytes = git_config_ulong (var , value , cctx -> kvi );
1010+ return 0 ;
1011+ }
1012+
1013+ if (!strcmp (var , "survey.showtreeentries" )) {
1014+ ctx -> opts .show_largest_trees_by_nr_entries = git_config_ulong (var , value , cctx -> kvi );
1015+ return 0 ;
1016+ }
1017+ if (!strcmp (var , "survey.showtreesizes" )) {
1018+ ctx -> opts .show_largest_trees_by_size_bytes = git_config_ulong (var , value , cctx -> kvi );
1019+ return 0 ;
1020+ }
1021+ if (!strcmp (var , "survey.showblobsizes" )) {
1022+ ctx -> opts .show_largest_blobs_by_size_bytes = git_config_ulong (var , value , cctx -> kvi );
1023+ return 0 ;
1024+ }
8611025 if (!strcmp (var , "survey.top" )) {
8621026 ctx -> opts .top_nr = git_config_bool (var , value );
8631027 return 0 ;
@@ -1068,6 +1232,9 @@ static void increment_totals(struct survey_context *ctx,
10681232
10691233 ctx -> report .reachable_objects .commits .parent_cnt_pbin [k ]++ ;
10701234 base = & ctx -> report .reachable_objects .commits .base ;
1235+
1236+ maybe_insert_large_item (ctx -> report .reachable_objects .commits .vec_largest_by_nr_parents , k , & commit -> object .oid );
1237+ maybe_insert_large_item (ctx -> report .reachable_objects .commits .vec_largest_by_size_bytes , object_length , & commit -> object .oid );
10711238 break ;
10721239 }
10731240 case OBJ_TREE : {
@@ -1087,8 +1254,8 @@ static void increment_totals(struct survey_context *ctx,
10871254
10881255 pst -> sum_entries += nr_entries ;
10891256
1090- if ( nr_entries > pst -> max_entries )
1091- pst -> max_entries = nr_entries ;
1257+ maybe_insert_large_item ( pst -> vec_largest_by_nr_entries , nr_entries , & tree -> object . oid );
1258+ maybe_insert_large_item ( pst -> vec_largest_by_size_bytes , object_length , & tree -> object . oid ) ;
10921259
10931260 qb = qbin (nr_entries );
10941261 incr_obj_hist_bin (& pst -> entry_qbin [qb ], object_length , disk_sizep );
@@ -1098,6 +1265,8 @@ static void increment_totals(struct survey_context *ctx,
10981265 }
10991266 case OBJ_BLOB :
11001267 base = & ctx -> report .reachable_objects .blobs .base ;
1268+
1269+ maybe_insert_large_item (ctx -> report .reachable_objects .blobs .vec_largest_by_size_bytes , object_length , & oids -> oid [i ]);
11011270 break ;
11021271 default :
11031272 continue ;
@@ -1304,6 +1473,14 @@ int cmd_survey(int argc, const char **argv, const char *prefix, struct repositor
13041473 OPT_BOOL_F (0 , "detached" , & ctx .opts .refs .want_detached , N_ ("include detached HEAD" ), PARSE_OPT_NONEG ),
13051474 OPT_BOOL_F (0 , "other" , & ctx .opts .refs .want_other , N_ ("include notes and stashes" ), PARSE_OPT_NONEG ),
13061475
1476+ OPT_INTEGER_F (0 , "commit-parents" , & ctx .opts .show_largest_commits_by_nr_parents , N_ ("show N largest commits by parent count" ), PARSE_OPT_NONEG ),
1477+ OPT_INTEGER_F (0 , "commit-sizes" , & ctx .opts .show_largest_commits_by_size_bytes , N_ ("show N largest commits by size in bytes" ), PARSE_OPT_NONEG ),
1478+
1479+ OPT_INTEGER_F (0 , "tree-entries" , & ctx .opts .show_largest_trees_by_nr_entries , N_ ("show N largest trees by entry count" ), PARSE_OPT_NONEG ),
1480+ OPT_INTEGER_F (0 , "tree-sizes" , & ctx .opts .show_largest_trees_by_size_bytes , N_ ("show N largest trees by size in bytes" ), PARSE_OPT_NONEG ),
1481+
1482+ OPT_INTEGER_F (0 , "blob-sizes" , & ctx .opts .show_largest_blobs_by_size_bytes , N_ ("show N largest blobs by size in bytes" ), PARSE_OPT_NONEG ),
1483+
13071484 OPT_END (),
13081485 };
13091486
@@ -1327,6 +1504,39 @@ int cmd_survey(int argc, const char **argv, const char *prefix, struct repositor
13271504
13281505 fixup_refs_wanted (& ctx );
13291506
1507+ if (ctx .opts .show_largest_commits_by_nr_parents )
1508+ ctx .report .reachable_objects .commits .vec_largest_by_nr_parents =
1509+ alloc_large_item_vec (
1510+ "largest_commits_by_nr_parents" ,
1511+ "nr_parents" ,
1512+ ctx .opts .show_largest_commits_by_nr_parents );
1513+ if (ctx .opts .show_largest_commits_by_size_bytes )
1514+ ctx .report .reachable_objects .commits .vec_largest_by_size_bytes =
1515+ alloc_large_item_vec (
1516+ "largest_commits_by_size_bytes" ,
1517+ "size" ,
1518+ ctx .opts .show_largest_commits_by_size_bytes );
1519+
1520+ if (ctx .opts .show_largest_trees_by_nr_entries )
1521+ ctx .report .reachable_objects .trees .vec_largest_by_nr_entries =
1522+ alloc_large_item_vec (
1523+ "largest_trees_by_nr_entries" ,
1524+ "nr_entries" ,
1525+ ctx .opts .show_largest_trees_by_nr_entries );
1526+ if (ctx .opts .show_largest_trees_by_size_bytes )
1527+ ctx .report .reachable_objects .trees .vec_largest_by_size_bytes =
1528+ alloc_large_item_vec (
1529+ "largest_trees_by_size_bytes" ,
1530+ "size" ,
1531+ ctx .opts .show_largest_trees_by_size_bytes );
1532+
1533+ if (ctx .opts .show_largest_blobs_by_size_bytes )
1534+ ctx .report .reachable_objects .blobs .vec_largest_by_size_bytes =
1535+ alloc_large_item_vec (
1536+ "largest_blobs_by_size_bytes" ,
1537+ "size" ,
1538+ ctx .opts .show_largest_blobs_by_size_bytes );
1539+
13301540 survey_phase_refs (& ctx );
13311541
13321542 survey_phase_objects (& ctx );
0 commit comments