@@ -113,6 +113,11 @@ export interface TreeFile {
113113 tree : TreeNode ;
114114}
115115
116+ export interface ForestFile {
117+ version : 'forest-0.1' ;
118+ forest : Record < string , TreeNode > ;
119+ }
120+
116121type TreeNode = Node | SubTreeReference ;
117122
118123/**
@@ -121,7 +126,15 @@ type TreeNode = Node | SubTreeReference;
121126interface SubTreeReference {
122127 readonly id : string ;
123128 readonly path : string ;
124- readonly fileName : string ;
129+ fileName : string ;
130+
131+ /**
132+ * If set, indicates the subtree in the forest file
133+ *
134+ * If this is set then `fileName` must point to a ForestFile, and this indicates
135+ * the tree inside the forest.
136+ */
137+ treeId ?: string ;
125138}
126139
127140/**
@@ -144,11 +157,29 @@ interface SubTreeReference {
144157 * leaves in-place to a different node type will (probably) minimally change
145158 * the size of the tree, whereas adding C more children that will all become
146159 * references to substrees will add an unpredictable size to the tree.
160+ * - Because doing this will end up with a lot of subtrees (as many as there are leaf nodes
161+ * in the trees, which is degree ^ depth), writing those all to individual files creates
162+ * A LOT of files which is undesirable for people looking at the directory. We will
163+ * accumulate subtrees into "forest" files which each hold a set of trees, identified
164+ * by a filename and a tree ID. We allocate them by first building the individual trees,
165+ * then accumulating subtrees up to a node count into forests, and then writing the
166+ * name of the forest file and the tree ID into the node that points to the subtree.
147167 *
148168 * Here's a sense of the numbers: a project with 277k nodes leads to an 136M JSON
149169 * file (490 bytes/node). We'll estimate the size of a node to be 1000 bytes.
150170 */
151171class FragmentedTreeWriter {
172+ /**
173+ * We only care about the identify of this object.
174+ *
175+ * Whatever tree in the forest is "pointed to" by this pointer is the main tree.
176+ */
177+ private readonly mainTreePointer : SubTreeReference = {
178+ fileName : 'yyy' ,
179+ id : 'id' ,
180+ path : 'path' ,
181+ } ;
182+
152183 private readonly forest = new Array < Tree > ( ) ;
153184
154185 /**
@@ -168,8 +199,6 @@ class FragmentedTreeWriter {
168199
169200 private readonly maxNodes : number ;
170201
171- private subtreeCtr = 1 ;
172-
173202 constructor ( private readonly outdir : string , private readonly rootFilename : string , options ?: FragmentedTreeWriterOptions ) {
174203 this . maxNodes = options ?. maxNodesPerTree ?? 500_000 ;
175204 }
@@ -178,14 +207,59 @@ class FragmentedTreeWriter {
178207 * Write the forest to disk, return the root file name
179208 */
180209 public writeForest ( ) : string {
181- for ( const tree of this . forest ) {
182- const treeFile : TreeFile = { version : 'tree-0.1' , tree : tree . root } ;
183- fs . writeFileSync ( path . join ( this . outdir , tree . filename ) , JSON . stringify ( treeFile ) , { encoding : 'utf-8' } ) ;
210+ const forestFiles = this . allocateSubTreesToForestFiles ( ) ;
211+
212+ // We can now write the forest files, and the main file.
213+ const mainTree = this . forest . find ( t => t . referencingNode === this . mainTreePointer ) ;
214+ if ( mainTree ) {
215+ const treeFile : TreeFile = { version : 'tree-0.1' , tree : mainTree . root } ;
216+ fs . writeFileSync ( path . join ( this . outdir , this . rootFilename ) , JSON . stringify ( treeFile ) , { encoding : 'utf-8' } ) ;
217+ }
218+
219+ for ( const forestFile of forestFiles ) {
220+ fs . writeFileSync ( path . join ( this . outdir , forestFile . fileName ) , JSON . stringify ( forestFile . file ) , { encoding : 'utf-8' } ) ;
184221 }
185222
186223 return this . rootFilename ;
187224 }
188225
226+ /**
227+ * Find all non-main tree and combine them into forest files
228+ *
229+ * This will mutate the pointing nodes as a side effect.
230+ */
231+ private allocateSubTreesToForestFiles ( ) : IncompleteForestFile [ ] {
232+ // First, find all non-main trees and allocate them to forests.
233+ const ret = new Array < IncompleteForestFile > ( ) ;
234+
235+ for ( const tree of this . forest ) {
236+ if ( tree . referencingNode === this . mainTreePointer ) {
237+ // Main tree, not interesting for the purposes of allocating subtrees to forests
238+ continue ;
239+ }
240+
241+ let targetForest : typeof ret [ 0 ] ;
242+ if ( ret . length === 0 || ret [ ret . length - 1 ] . nodeCount + tree . nodes > this . maxNodes ) {
243+ targetForest = {
244+ fileName : `trees-${ ret . length + 1 } .json` ,
245+ file : { version : 'forest-0.1' , forest : { } } ,
246+ nodeCount : 0 ,
247+ } ;
248+ ret . push ( targetForest ) ;
249+ } else {
250+ targetForest = ret [ ret . length - 1 ] ;
251+ }
252+
253+ const treeId = `t${ Object . keys ( targetForest . file . forest ) . length } ` ;
254+ targetForest . file . forest [ treeId ] = tree . root ;
255+ targetForest . nodeCount += tree . nodes ;
256+ tree . referencingNode . fileName = targetForest . fileName ;
257+ tree . referencingNode . treeId = treeId ;
258+ }
259+
260+ return ret ;
261+ }
262+
189263 public addNode ( construct : IConstruct , parent : IConstruct | undefined , node : Node ) {
190264 // NOTE: we could copy the 'node' object to be safe against tampering, but we trust
191265 // the consuming code so we know we don't need to.
@@ -195,7 +269,7 @@ class FragmentedTreeWriter {
195269 throw new AssumptionError ( 'Can only add exactly one node without a parent' ) ;
196270 }
197271
198- this . addNewTree ( node , this . rootFilename ) ;
272+ this . addNewTree ( node , this . mainTreePointer ) ;
199273 } else {
200274 // There was a provision in the old code for missing parents, so we're just going to ignore it
201275 // if we can't find a parent.
@@ -213,10 +287,10 @@ class FragmentedTreeWriter {
213287 /**
214288 * Add a new tree with the given Node as root
215289 */
216- private addNewTree ( root : Node , filename : string ) : Tree {
290+ private addNewTree ( root : Node , referencingNode : SubTreeReference ) : Tree {
217291 const tree : Tree = {
218292 root,
219- filename ,
293+ referencingNode : referencingNode ,
220294 nodes : nodeCount ( root ) ,
221295 } ;
222296
@@ -240,13 +314,15 @@ class FragmentedTreeWriter {
240314 throw new AssumptionError ( `Could not find parent of ${ JSON . stringify ( parent ) } ` ) ;
241315 }
242316
243- tree = this . addNewTree ( parent , `tree-${ this . subtreeCtr ++ } .json` ) ;
244-
245- setChild ( grandParent , {
317+ const subtreeReference : SubTreeReference = {
246318 id : parent . id ,
247319 path : parent . path ,
248- fileName : tree . filename ,
249- } satisfies SubTreeReference ) ;
320+ fileName : 'xxx' , // Will be replaced later
321+ } ;
322+
323+ tree = this . addNewTree ( parent , subtreeReference ) ;
324+
325+ setChild ( grandParent , subtreeReference ) ;
250326
251327 // To be strictly correct we should decrease the original tree's nodeCount here, because
252328 // we may have moved away any number of children as well. We don't do that; the tree
@@ -286,7 +362,7 @@ class FragmentedTreeWriter {
286362 if ( tree ) {
287363 return tree ;
288364 }
289- throw new AssumptionError ( `Could not find tree for node: ${ JSON . stringify ( node ) } , tried ${ tried } , ${ Array . from ( this . subtreeRoots ) . map ( ( [ k , v ] ) => ` ${ k . path } => ${ v . filename } ` ) } ` ) ;
365+ throw new AssumptionError ( `Could not find tree for node: ${ JSON . stringify ( node ) } , tried ${ tried } ` ) ;
290366 }
291367}
292368
@@ -329,17 +405,26 @@ interface Tree {
329405 * The root of this particular tree
330406 */
331407 root : Node ;
408+
332409 /**
333- * The filename that `root` will be serialized to
410+ * The node that is pointing to this tree
411+ *
412+ * This may be "mainTreePointer", in which case this tree indicates the main tree.
334413 */
335- filename : string ;
414+ referencingNode : SubTreeReference ;
336415
337416 /**
338417 * How many nodes are in this tree already
339418 */
340419 nodes : number ;
341420}
342421
422+ interface IncompleteForestFile {
423+ fileName : string ;
424+ nodeCount : number ;
425+ file : ForestFile ;
426+ }
427+
343428export function isSubtreeReference ( x : TreeFile [ 'tree' ] ) : x is Extract < TreeFile [ 'tree' ] , { fileName : string } > {
344429 return ! ! ( x as any ) . fileName ;
345430}
0 commit comments