@@ -44,186 +44,133 @@ function StringDecoder(encoding) {
4444 var nb ;
4545 switch ( this . encoding ) {
4646 case 'utf16le' :
47- this . text = utf16Text ;
48- this . end = utf16End ;
47+ this . complete = utf16Complete ;
48+ this . flush = simpleFlush ;
4949 // fall through
5050 case 'utf8' :
5151 nb = 4 ;
5252 break ;
5353 case 'base64' :
54- this . text = base64Text ;
55- this . end = base64End ;
54+ this . complete = base64Complete ;
55+ this . flush = simpleFlush ;
5656 nb = 3 ;
5757 break ;
5858 default :
5959 this . write = simpleWrite ;
6060 this . end = simpleEnd ;
6161 return ;
6262 }
63- this . lastNeed = 0 ;
64- this . lastTotal = 0 ;
63+ this . partial = 0 ;
6564 this . lastChar = Buffer . allocUnsafe ( nb ) ;
6665}
6766
6867StringDecoder . prototype . write = function ( buf ) {
6968 if ( buf . length === 0 )
7069 return '' ;
71- var r ;
72- var i ;
73- if ( this . lastNeed ) {
74- r = this . fillLast ( buf ) ;
75- if ( r === undefined )
76- return '' ;
77- i = this . lastNeed ;
78- this . lastNeed = 0 ;
79- } else {
80- i = 0 ;
81- }
82- if ( i < buf . length )
83- return ( r ? r + this . text ( buf , i ) : this . text ( buf , i ) ) ;
84- return r || '' ;
70+ const partial = this . partial ;
71+ if ( ! partial )
72+ return this . text ( buf , 0 , buf . length ) ;
73+
74+ // We have incomplete characters in partial many bytes from last run.
75+ // Copy bytes from buf to fill lastChar (if there is enough input).
76+ const newHeadLen = Math . min ( buf . length , this . lastChar . length - partial ) ;
77+ const totalHeadLen = newHeadLen + partial ;
78+ buf . copy ( this . lastChar , partial , 0 , newHeadLen ) ;
79+ // Now we have totalHeadLen bytes of input in lastChar, try to convert that.
80+ let r = this . text ( this . lastChar , 0 , totalHeadLen ) ;
81+ if ( this . partial <= newHeadLen ) // consumed at least all the old head
82+ r += this . text ( buf , newHeadLen - this . partial , buf . length ) ;
83+ return r ;
8584} ;
8685
87- StringDecoder . prototype . end = utf8End ;
88-
8986// Returns only complete characters in a Buffer
90- StringDecoder . prototype . text = utf8Text ;
87+ StringDecoder . prototype . text = function ( buf , start , end ) {
88+ if ( start === end )
89+ return '' ;
90+ const complete = this . complete ( buf , start , end ) ;
91+ this . partial = end - complete ;
92+ if ( this . partial && buf !== this . lastChar )
93+ buf . copy ( this . lastChar , 0 , complete , end ) ;
94+ if ( start === complete )
95+ return '' ;
96+ return buf . toString ( this . encoding , start , complete ) ;
97+ } ;
9198
92- // Attempts to complete a partial character using bytes from a Buffer
93- StringDecoder . prototype . fillLast = function ( buf ) {
94- if ( this . lastNeed <= buf . length ) {
95- buf . copy ( this . lastChar , this . lastTotal - this . lastNeed , 0 , this . lastNeed ) ;
96- return this . lastChar . toString ( this . encoding , 0 , this . lastTotal ) ;
99+ // Returns a suitable representation of incomplete characters as well
100+ StringDecoder . prototype . end = function ( buf ) {
101+ let r = ( buf && buf . length ? this . write ( buf ) : '' ) ;
102+ if ( this . partial ) {
103+ r += this . flush ( ) ;
104+ this . partial = 0 ;
97105 }
98- buf . copy ( this . lastChar , this . lastTotal - this . lastNeed , 0 , buf . length ) ;
99- this . lastNeed -= buf . length ;
106+ return r ;
100107} ;
101108
102- // Checks the type of a UTF-8 byte, whether it's ASCII, a leading byte, or a
103- // continuation byte.
104- function utf8CheckByte ( byte ) {
105- if ( byte <= 0x7F )
106- return 0 ;
107- else if ( byte >> 5 === 0x06 )
108- return 2 ;
109- else if ( byte >> 4 === 0x0E )
110- return 3 ;
111- else if ( byte >> 3 === 0x1E )
112- return 4 ;
113- return - 1 ;
114- }
109+ // Given (buf, start, end), determine the maximal n <= end such that
110+ // buf.slice(start, n) contains only complete characters
111+ StringDecoder . prototype . complete = utf8Complete ;
112+
113+ // Returns a string representation of the this.partial bytes in
114+ // this.lastChar which represent an incomplete character
115+ StringDecoder . prototype . flush = utf8Flush ;
115116
116117// Checks at most the last 3 bytes of a Buffer for an incomplete UTF-8
117- // character, returning the total number of bytes needed to complete the partial
118- // character (if applicable).
119- function utf8CheckIncomplete ( self , buf , i ) {
120- var j = buf . length - 1 ;
121- if ( j < i )
122- return 0 ;
123- var nb = utf8CheckByte ( buf [ j -- ] ) ;
124- if ( nb >= 0 ) {
125- if ( nb > 0 )
126- self . lastNeed = nb + 1 - ( buf . length - j ) ;
127- return nb ;
128- }
129- if ( j < i )
130- return 0 ;
131- nb = utf8CheckByte ( buf [ j -- ] ) ;
132- if ( nb >= 0 ) {
133- if ( nb > 0 )
134- self . lastNeed = nb + 1 - ( buf . length - j ) ;
135- return nb ;
136- }
137- if ( j < i )
138- return 0 ;
139- nb = utf8CheckByte ( buf [ j -- ] ) ;
140- if ( nb >= 0 ) {
141- if ( nb > 0 )
142- self . lastNeed = nb + 1 - ( buf . length - j ) ;
143- return nb ;
118+ // character, returning the position after the last complete character.
119+ function utf8Complete ( buf , start , end ) {
120+ if ( start > end - 3 )
121+ start = end - 3 ;
122+ for ( let i = end - 1 ; i >= start ; -- i ) {
123+ const byte = buf [ i ] ;
124+ let numBytes ;
125+ if ( byte >> 6 === 0x02 )
126+ continue ; // continuation byte
127+ else if ( byte >> 5 === 0x06 )
128+ numBytes = 2 ;
129+ else if ( byte >> 4 === 0x0E )
130+ numBytes = 3 ;
131+ else if ( byte >> 3 === 0x1E )
132+ numBytes = 4 ;
133+ else
134+ numBytes = 1 ; // ASCII or invalid
135+ if ( i + numBytes > end ) // incomplete
136+ return i ; // continue next run at leading byte
137+ // Have complete sequence, possibly followed by garbage continuation.
138+ return end ;
144139 }
145- return 0 ;
146- }
147-
148- // Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a
149- // partial character, the character's bytes are buffered until the required
150- // number of bytes are available.
151- function utf8Text ( buf , i ) {
152- const total = utf8CheckIncomplete ( this , buf , i ) ;
153- if ( ! this . lastNeed )
154- return buf . toString ( 'utf8' , i ) ;
155- this . lastTotal = total ;
156- const end = buf . length - ( total - this . lastNeed ) ;
157- buf . copy ( this . lastChar , 0 , end ) ;
158- return buf . toString ( 'utf8' , i , end ) ;
140+ // Ends in valid 4-byte sequence or invalid continuation characters.
141+ // Either way the input is complete, so convert it as is.
142+ return end ;
159143}
160144
161145// For UTF-8, a replacement character for each buffered byte of a (partial)
162146// character needs to be added to the output.
163- function utf8End ( buf ) {
164- const r = ( buf && buf . length ? this . write ( buf ) : '' ) ;
165- if ( this . lastNeed )
166- return r + '\ufffd' . repeat ( this . lastTotal - this . lastNeed ) ;
167- return r ;
147+ function utf8Flush ( ) {
148+ return '\ufffd' . repeat ( this . partial ) ;
168149}
169150
170151// UTF-16LE typically needs two bytes per character, but even if we have an even
171152// number of bytes available, we need to check if we end on a leading/high
172153// surrogate. In that case, we need to wait for the next two bytes in order to
173154// decode the last character properly.
174- function utf16Text ( buf , i ) {
175- if ( ( buf . length - i ) % 2 === 0 ) {
176- const r = buf . toString ( 'utf16le' , i ) ;
177- if ( r ) {
178- const c = r . charCodeAt ( r . length - 1 ) ;
179- if ( c >= 0xD800 && c <= 0xDBFF ) {
180- this . lastNeed = 2 ;
181- this . lastTotal = 4 ;
182- this . lastChar [ 0 ] = buf [ buf . length - 2 ] ;
183- this . lastChar [ 1 ] = buf [ buf . length - 1 ] ;
184- return r . slice ( 0 , - 1 ) ;
185- }
186- }
187- return r ;
155+ function utf16Complete ( buf , start , end ) {
156+ if ( ( end - start ) & 1 )
157+ -- end ;
158+ if ( end > start ) {
159+ const byte = buf [ end - 1 ] ;
160+ if ( byte >= 0xD8 && byte <= 0xDB )
161+ return end - 2 ;
188162 }
189- this . lastNeed = 1 ;
190- this . lastTotal = 2 ;
191- this . lastChar [ 0 ] = buf [ buf . length - 1 ] ;
192- return buf . toString ( 'utf16le' , i , buf . length - 1 ) ;
163+ return end ;
193164}
194165
195- // For UTF-16LE we do not explicitly append special replacement characters if we
196- // end on a partial character, we simply let v8 handle that.
197- function utf16End ( buf ) {
198- const r = ( buf && buf . length ? this . write ( buf ) : '' ) ;
199- if ( this . lastNeed ) {
200- const end = this . lastTotal - this . lastNeed ;
201- return r + this . lastChar . toString ( 'utf16le' , 0 , end ) ;
202- }
203- return r ;
166+ function base64Complete ( buf , start , end ) {
167+ return end - ( end - start ) % 3 ;
204168}
205169
206- function base64Text ( buf , i ) {
207- const n = ( buf . length - i ) % 3 ;
208- if ( n === 0 )
209- return buf . toString ( 'base64' , i ) ;
210- this . lastNeed = 3 - n ;
211- this . lastTotal = 3 ;
212- if ( n === 1 ) {
213- this . lastChar [ 0 ] = buf [ buf . length - 1 ] ;
214- } else {
215- this . lastChar [ 0 ] = buf [ buf . length - 2 ] ;
216- this . lastChar [ 1 ] = buf [ buf . length - 1 ] ;
217- }
218- return buf . toString ( 'base64' , i , buf . length - n ) ;
219- }
220-
221-
222- function base64End ( buf ) {
223- const r = ( buf && buf . length ? this . write ( buf ) : '' ) ;
224- if ( this . lastNeed )
225- return r + this . lastChar . toString ( 'base64' , 0 , 3 - this . lastNeed ) ;
226- return r ;
170+ // For UTF-16LE and Base64 we do not explicitly append special replacement
171+ // characters if we end on a partial character, we simply let v8 handle that.
172+ function simpleFlush ( ) {
173+ return this . lastChar . toString ( this . encoding , 0 , this . partial ) ;
227174}
228175
229176// Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex)
0 commit comments