2222'use strict' ;
2323
2424const { Buffer } = require ( 'buffer' ) ;
25+ const {
26+ kIncompleteCharactersStart,
27+ kIncompleteCharactersEnd,
28+ kMissingBytes,
29+ kBufferedBytes,
30+ kEncodingField,
31+ kSize,
32+ decode,
33+ flush,
34+ encodings
35+ } = internalBinding ( 'string_decoder' ) ;
2536const internalUtil = require ( 'internal/util' ) ;
2637const errors = require ( 'internal/errors' ) ;
2738const isEncoding = Buffer [ internalUtil . kIsEncodingSymbol ] ;
2839
40+ const kNativeDecoder = Symbol ( 'kNativeDecoder' ) ;
41+
2942// Do not cache `Buffer.isEncoding` when checking encoding names as some
3043// modules monkey-patch it to support additional encodings
3144function normalizeEncoding ( enc ) {
@@ -36,258 +49,54 @@ function normalizeEncoding(enc) {
3649 return nenc || enc ;
3750}
3851
52+ const encodingsMap = { } ;
53+ for ( var i = 0 ; i < encodings . length ; ++ i )
54+ encodingsMap [ encodings [ i ] ] = i ;
55+
3956// StringDecoder provides an interface for efficiently splitting a series of
4057// buffers into a series of JS strings without breaking apart multi-byte
4158// characters.
42- exports . StringDecoder = StringDecoder ;
43- function StringDecoder ( encoding ) {
44- this . encoding = normalizeEncoding ( encoding ) ;
45- var nb ;
46- switch ( this . encoding ) {
47- case 'utf16le' :
48- this . text = utf16Text ;
49- this . end = utf16End ;
50- nb = 4 ;
51- break ;
52- case 'utf8' :
53- this . fillLast = utf8FillLast ;
54- nb = 4 ;
55- break ;
56- case 'base64' :
57- this . text = base64Text ;
58- this . end = base64End ;
59- nb = 3 ;
60- break ;
61- default :
62- this . write = simpleWrite ;
63- this . end = simpleEnd ;
64- return ;
65- }
66- this . lastNeed = 0 ;
67- this . lastTotal = 0 ;
68- this . lastChar = Buffer . allocUnsafe ( nb ) ;
69- }
70-
71- StringDecoder . prototype . write = function ( buf ) {
72- if ( buf . length === 0 )
73- return '' ;
74- var r ;
75- var i ;
76- if ( this . lastNeed ) {
77- r = this . fillLast ( buf ) ;
78- if ( r === undefined )
79- return '' ;
80- i = this . lastNeed ;
81- this . lastNeed = 0 ;
82- } else {
83- i = 0 ;
84- }
85- if ( i < buf . length )
86- return ( r ? r + this . text ( buf , i ) : this . text ( buf , i ) ) ;
87- return r || '' ;
88- } ;
89-
90- StringDecoder . prototype . end = utf8End ;
91-
92- // Returns only complete characters in a Buffer
93- StringDecoder . prototype . text = utf8Text ;
94-
95- // Attempts to complete a partial non-UTF-8 character using bytes from a Buffer
96- StringDecoder . prototype . fillLast = function ( buf ) {
97- if ( this . lastNeed <= buf . length ) {
98- buf . copy ( this . lastChar , this . lastTotal - this . lastNeed , 0 , this . lastNeed ) ;
99- return this . lastChar . toString ( this . encoding , 0 , this . lastTotal ) ;
100- }
101- buf . copy ( this . lastChar , this . lastTotal - this . lastNeed , 0 , buf . length ) ;
102- this . lastNeed -= buf . length ;
103- } ;
104-
105- // Checks the type of a UTF-8 byte, whether it's ASCII, a leading byte, or a
106- // continuation byte. If an invalid byte is detected, -2 is returned.
107- function utf8CheckByte ( byte ) {
108- if ( byte <= 0x7F )
109- return 0 ;
110- else if ( byte >> 5 === 0x06 )
111- return 2 ;
112- else if ( byte >> 4 === 0x0E )
113- return 3 ;
114- else if ( byte >> 3 === 0x1E )
115- return 4 ;
116- return ( byte >> 6 === 0x02 ? - 1 : - 2 ) ;
117- }
118-
119- // Checks at most 3 bytes at the end of a Buffer in order to detect an
120- // incomplete multi-byte UTF-8 character. The total number of bytes (2, 3, or 4)
121- // needed to complete the UTF-8 character (if applicable) are returned.
122- function utf8CheckIncomplete ( self , buf , i ) {
123- var j = buf . length - 1 ;
124- if ( j < i )
125- return 0 ;
126- var nb = utf8CheckByte ( buf [ j ] ) ;
127- if ( nb >= 0 ) {
128- if ( nb > 0 )
129- self . lastNeed = nb - 1 ;
130- return nb ;
131- }
132- if ( -- j < i || nb === - 2 )
133- return 0 ;
134- nb = utf8CheckByte ( buf [ j ] ) ;
135- if ( nb >= 0 ) {
136- if ( nb > 0 )
137- self . lastNeed = nb - 2 ;
138- return nb ;
139- }
140- if ( -- j < i || nb === - 2 )
141- return 0 ;
142- nb = utf8CheckByte ( buf [ j ] ) ;
143- if ( nb >= 0 ) {
144- if ( nb > 0 ) {
145- if ( nb === 2 )
146- nb = 0 ;
147- else
148- self . lastNeed = nb - 3 ;
149- }
150- return nb ;
151- }
152- return 0 ;
153- }
154-
155- // Validates as many continuation bytes for a multi-byte UTF-8 character as
156- // needed or are available. If we see a non-continuation byte where we expect
157- // one, we "replace" the validated continuation bytes we've seen so far with
158- // a single UTF-8 replacement character ('\ufffd'), to match v8's UTF-8 decoding
159- // behavior. The continuation byte check is included three times in the case
160- // where all of the continuation bytes for a character exist in the same buffer.
161- // It is also done this way as a slight performance increase instead of using a
162- // loop.
163- function utf8CheckExtraBytes ( self , buf , p ) {
164- if ( ( buf [ 0 ] & 0xC0 ) !== 0x80 ) {
165- self . lastNeed = 0 ;
166- return '\ufffd' ;
167- }
168- if ( self . lastNeed > 1 && buf . length > 1 ) {
169- if ( ( buf [ 1 ] & 0xC0 ) !== 0x80 ) {
170- self . lastNeed = 1 ;
171- return '\ufffd' ;
172- }
173- if ( self . lastNeed > 2 && buf . length > 2 ) {
174- if ( ( buf [ 2 ] & 0xC0 ) !== 0x80 ) {
175- self . lastNeed = 2 ;
176- return '\ufffd' ;
177- }
178- }
59+ class StringDecoder {
60+ constructor ( encoding ) {
61+ this . encoding = normalizeEncoding ( encoding ) ;
62+ this [ kNativeDecoder ] = Buffer . alloc ( kSize ) ;
63+ this [ kNativeDecoder ] [ kEncodingField ] = encodingsMap [ this . encoding ] ;
17964 }
180- }
18165
182- // Attempts to complete a multi-byte UTF-8 character using bytes from a Buffer.
183- function utf8FillLast ( buf ) {
184- const p = this . lastTotal - this . lastNeed ;
185- var r = utf8CheckExtraBytes ( this , buf , p ) ;
186- if ( r !== undefined )
187- return r ;
188- if ( this . lastNeed <= buf . length ) {
189- buf . copy ( this . lastChar , p , 0 , this . lastNeed ) ;
190- return this . lastChar . toString ( this . encoding , 0 , this . lastTotal ) ;
66+ write ( buf ) {
67+ if ( typeof buf === 'string' )
68+ return buf ;
69+ if ( ! ArrayBuffer . isView ( buf ) )
70+ throw new errors . TypeError ( 'ERR_INVALID_ARG_TYPE' , 'buf' ,
71+ [ 'Buffer' , 'Uint8Array' , 'ArrayBufferView' ] ) ;
72+ return decode ( this [ kNativeDecoder ] , buf ) ;
19173 }
192- buf . copy ( this . lastChar , p , 0 , buf . length ) ;
193- this . lastNeed -= buf . length ;
194- }
19574
196- // Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a
197- // partial character, the character's bytes are buffered until the required
198- // number of bytes are available.
199- function utf8Text ( buf , i ) {
200- const total = utf8CheckIncomplete ( this , buf , i ) ;
201- if ( ! this . lastNeed )
202- return buf . toString ( 'utf8' , i ) ;
203- this . lastTotal = total ;
204- const end = buf . length - ( total - this . lastNeed ) ;
205- buf . copy ( this . lastChar , 0 , end ) ;
206- return buf . toString ( 'utf8' , i , end ) ;
207- }
208-
209- // For UTF-8, a replacement character is added when ending on a partial
210- // character.
211- function utf8End ( buf ) {
212- const r = ( buf && buf . length ? this . write ( buf ) : '' ) ;
213- if ( this . lastNeed ) {
214- this . lastNeed = 0 ;
215- this . lastTotal = 0 ;
216- return r + '\ufffd' ;
75+ end ( buf ) {
76+ let ret = '' ;
77+ if ( buf !== undefined )
78+ ret = this . write ( buf ) ;
79+ if ( this [ kNativeDecoder ] [ kBufferedBytes ] > 0 )
80+ ret += flush ( this [ kNativeDecoder ] ) ;
81+ return ret ;
21782 }
218- return r ;
219- }
22083
221- // UTF-16LE typically needs two bytes per character, but even if we have an even
222- // number of bytes available, we need to check if we end on a leading/high
223- // surrogate. In that case, we need to wait for the next two bytes in order to
224- // decode the last character properly.
225- function utf16Text ( buf , i ) {
226- if ( ( buf . length - i ) % 2 === 0 ) {
227- const r = buf . toString ( 'utf16le' , i ) ;
228- if ( r ) {
229- const c = r . charCodeAt ( r . length - 1 ) ;
230- if ( c >= 0xD800 && c <= 0xDBFF ) {
231- this . lastNeed = 2 ;
232- this . lastTotal = 4 ;
233- this . lastChar [ 0 ] = buf [ buf . length - 2 ] ;
234- this . lastChar [ 1 ] = buf [ buf . length - 1 ] ;
235- return r . slice ( 0 , - 1 ) ;
236- }
237- }
238- return r ;
239- }
240- this . lastNeed = 1 ;
241- this . lastTotal = 2 ;
242- this . lastChar [ 0 ] = buf [ buf . length - 1 ] ;
243- return buf . toString ( 'utf16le' , i , buf . length - 1 ) ;
244- }
84+ /* Everything below this line is undocumented legacy stuff. */
24585
246- // For UTF-16LE we do not explicitly append special replacement characters if we
247- // end on a partial character, we simply let v8 handle that.
248- function utf16End ( buf ) {
249- const r = ( buf && buf . length ? this . write ( buf ) : '' ) ;
250- if ( this . lastNeed ) {
251- const end = this . lastTotal - this . lastNeed ;
252- this . lastNeed = 0 ;
253- this . lastTotal = 0 ;
254- return r + this . lastChar . toString ( 'utf16le' , 0 , end ) ;
86+ text ( buf , offset ) {
87+ this [ kNativeDecoder ] [ kMissingBytes ] = 0 ;
88+ this [ kNativeDecoder ] [ kBufferedBytes ] = 0 ;
89+ return this . write ( buf . slice ( offset ) ) ;
25590 }
256- return r ;
257- }
25891
259- function base64Text ( buf , i ) {
260- const n = ( buf . length - i ) % 3 ;
261- if ( n === 0 )
262- return buf . toString ( 'base64' , i ) ;
263- this . lastNeed = 3 - n ;
264- this . lastTotal = 3 ;
265- if ( n === 1 ) {
266- this . lastChar [ 0 ] = buf [ buf . length - 1 ] ;
267- } else {
268- this . lastChar [ 0 ] = buf [ buf . length - 2 ] ;
269- this . lastChar [ 1 ] = buf [ buf . length - 1 ] ;
92+ get lastTotal ( ) {
93+ return this [ kNativeDecoder ] [ kBufferedBytes ] + this . lastNeed ;
27094 }
271- return buf . toString ( 'base64' , i , buf . length - n ) ;
272- }
273-
27495
275- function base64End ( buf ) {
276- const r = ( buf && buf . length ? this . write ( buf ) : '' ) ;
277- if ( this . lastNeed ) {
278- const end = 3 - this . lastNeed ;
279- this . lastNeed = 0 ;
280- this . lastTotal = 0 ;
281- return r + this . lastChar . toString ( 'base64' , 0 , end ) ;
96+ get lastChar ( ) {
97+ return this [ kNativeDecoder ] . subarray ( kIncompleteCharactersStart ,
98+ kIncompleteCharactersEnd ) ;
28299 }
283- return r ;
284100}
285101
286- // Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex)
287- function simpleWrite ( buf ) {
288- return buf . toString ( this . encoding ) ;
289- }
290-
291- function simpleEnd ( buf ) {
292- return ( buf && buf . length ? this . write ( buf ) : '' ) ;
293- }
102+ exports . StringDecoder = StringDecoder ;
0 commit comments