@@ -11,6 +11,9 @@ import (
1111 "math"
1212 "strconv"
1313 "strings"
14+
15+ "github.com/djherbis/buffer"
16+ "github.com/djherbis/nio/v3"
1417)
1518
1619// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
@@ -42,7 +45,7 @@ func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()
4245 }
4346 }()
4447
45- // For simplicities sake we'll us a buffered reader to read from the cat-file --batch
48+ // For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
4649 batchReader := bufio .NewReader (batchStdoutReader )
4750
4851 return batchStdinWriter , batchReader , cancel
@@ -53,7 +56,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
5356 // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
5457 // so let's create a batch stdin and stdout
5558 batchStdinReader , batchStdinWriter := io .Pipe ()
56- batchStdoutReader , batchStdoutWriter := io .Pipe ()
59+ batchStdoutReader , batchStdoutWriter := nio .Pipe (buffer . New ( 32 * 1024 ) )
5760 cancel := func () {
5861 _ = batchStdinReader .Close ()
5962 _ = batchStdinWriter .Close ()
@@ -74,7 +77,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
7477 }()
7578
7679 // For simplicities sake we'll us a buffered reader to read from the cat-file --batch
77- batchReader := bufio .NewReader (batchStdoutReader )
80+ batchReader := bufio .NewReaderSize (batchStdoutReader , 32 * 1024 )
7881
7982 return batchStdinWriter , batchReader , cancel
8083}
@@ -84,22 +87,31 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
8487// <sha> SP <type> SP <size> LF
8588// sha is a 40byte not 20byte here
8689func ReadBatchLine (rd * bufio.Reader ) (sha []byte , typ string , size int64 , err error ) {
87- sha , err = rd .ReadBytes ( ' ' )
90+ typ , err = rd .ReadString ( '\n ' )
8891 if err != nil {
8992 return
9093 }
91- sha = sha [:len (sha )- 1 ]
92-
93- typ , err = rd .ReadString ('\n' )
94- if err != nil {
94+ if len (typ ) == 1 {
95+ typ , err = rd .ReadString ('\n' )
96+ if err != nil {
97+ return
98+ }
99+ }
100+ idx := strings .IndexByte (typ , ' ' )
101+ if idx < 0 {
102+ log ("missing space typ: %s" , typ )
103+ err = ErrNotExist {ID : string (sha )}
95104 return
96105 }
106+ sha = []byte (typ [:idx ])
107+ typ = typ [idx + 1 :]
97108
98- idx : = strings .Index (typ , " " )
109+ idx = strings .IndexByte (typ , ' ' )
99110 if idx < 0 {
100111 err = ErrNotExist {ID : string (sha )}
101112 return
102113 }
114+
103115 sizeStr := typ [idx + 1 : len (typ )- 1 ]
104116 typ = typ [:idx ]
105117
@@ -130,7 +142,7 @@ headerLoop:
130142 }
131143
132144 // Discard the rest of the tag
133- discard := size - n
145+ discard := size - n + 1
134146 for discard > math .MaxInt32 {
135147 _ , err := rd .Discard (math .MaxInt32 )
136148 if err != nil {
@@ -200,85 +212,42 @@ func To40ByteSHA(sha, out []byte) []byte {
200212 return out
201213}
202214
203- // ParseTreeLineSkipMode reads an entry from a tree in a cat-file --batch stream
204- // This simply skips the mode - saving a substantial amount of time and carefully avoids allocations - except where fnameBuf is too small.
215+ // ParseTreeLine reads an entry from a tree in a cat-file --batch stream
216+ // This carefully avoids allocations - except where fnameBuf is too small.
205217// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
206218//
207219// Each line is composed of:
208220// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
209221//
210222// We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
211- func ParseTreeLineSkipMode (rd * bufio.Reader , fnameBuf , shaBuf []byte ) (fname , sha []byte , n int , err error ) {
223+ func ParseTreeLine (rd * bufio.Reader , modeBuf , fnameBuf , shaBuf []byte ) (mode , fname , sha []byte , n int , err error ) {
212224 var readBytes []byte
213- // Skip the Mode
214- readBytes , err = rd .ReadSlice (' ' ) // NB: DOES NOT ALLOCATE SIMPLY RETURNS SLICE WITHIN READER BUFFER
215- if err != nil {
216- return
217- }
218- n += len (readBytes )
219225
220- // Deal with the fname
226+ // Read the Mode & fname
221227 readBytes , err = rd .ReadSlice ('\x00' )
222- copy (fnameBuf , readBytes )
223- if len (fnameBuf ) > len (readBytes ) {
224- fnameBuf = fnameBuf [:len (readBytes )] // cut the buf the correct size
225- } else {
226- fnameBuf = append (fnameBuf , readBytes [len (fnameBuf ):]... ) // extend the buf and copy in the missing bits
227- }
228- for err == bufio .ErrBufferFull { // Then we need to read more
229- readBytes , err = rd .ReadSlice ('\x00' )
230- fnameBuf = append (fnameBuf , readBytes ... ) // there is little point attempting to avoid allocations here so just extend
231- }
232- n += len (fnameBuf )
233228 if err != nil {
234229 return
235230 }
236- fnameBuf = fnameBuf [:len (fnameBuf )- 1 ] // Drop the terminal NUL
237- fname = fnameBuf // set the returnable fname to the slice
238-
239- // Now deal with the 20-byte SHA
240- idx := 0
241- for idx < 20 {
242- read := 0
243- read , err = rd .Read (shaBuf [idx :20 ])
244- n += read
245- if err != nil {
246- return
247- }
248- idx += read
249- }
250- sha = shaBuf
251- return
252- }
253-
254- // ParseTreeLine reads an entry from a tree in a cat-file --batch stream
255- // This carefully avoids allocations - except where fnameBuf is too small.
256- // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
257- //
258- // Each line is composed of:
259- // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
260- //
261- // We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
262- func ParseTreeLine (rd * bufio.Reader , modeBuf , fnameBuf , shaBuf []byte ) (mode , fname , sha []byte , n int , err error ) {
263- var readBytes []byte
231+ idx := bytes .IndexByte (readBytes , ' ' )
232+ if idx < 0 {
233+ log ("missing space in readBytes ParseTreeLine: %s" , readBytes )
264234
265- // Read the Mode
266- readBytes , err = rd .ReadSlice (' ' )
267- if err != nil {
235+ err = & ErrNotExist {}
268236 return
269237 }
270- n += len (readBytes )
271- copy (modeBuf , readBytes )
272- if len (modeBuf ) > len (readBytes ) {
273- modeBuf = modeBuf [:len (readBytes )]
274- } else {
275- modeBuf = append (modeBuf , readBytes [len (modeBuf ):]... )
276238
239+ n += idx + 1
240+ copy (modeBuf , readBytes [:idx ])
241+ if len (modeBuf ) >= idx {
242+ modeBuf = modeBuf [:idx ]
243+ } else {
244+ modeBuf = append (modeBuf , readBytes [len (modeBuf ):idx ]... )
277245 }
278- mode = modeBuf [:len (modeBuf )- 1 ] // Drop the SP
246+ mode = modeBuf
247+
248+ readBytes = readBytes [idx + 1 :]
279249
280250 // Deal with the fname
281- readBytes , err = rd .ReadSlice ('\x00' )
282251 copy (fnameBuf , readBytes )
283252 if len (fnameBuf ) > len (readBytes ) {
284253 fnameBuf = fnameBuf [:len (readBytes )]
@@ -297,7 +266,7 @@ func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fn
297266 fname = fnameBuf
298267
299268 // Deal with the 20-byte SHA
300- idx : = 0
269+ idx = 0
301270 for idx < 20 {
302271 read := 0
303272 read , err = rd .Read (shaBuf [idx :20 ])
0 commit comments