@@ -22,16 +22,16 @@ use self::states::{Rawtext, Rcdata, ScriptData, ScriptDataEscaped};
2222use self :: char_ref:: { CharRef , CharRefTokenizer } ;
2323
2424use crate :: util:: str:: lower_ascii_letter;
25-
2625use log:: { debug, trace} ;
2726use mac:: format_if;
28- use markup5ever:: { namespace_url, ns, small_char_set} ;
27+ use markup5ever:: buffer_queue:: BufferQueue ;
28+ use markup5ever:: { namespace_url, ns, small_char_set, InputSink , InputSinkResult } ;
2929use std:: borrow:: Cow :: { self , Borrowed } ;
3030use std:: cell:: { Cell , RefCell , RefMut } ;
3131use std:: collections:: BTreeMap ;
32- use std:: mem;
32+ use std:: { iter , mem} ;
3333
34- pub use crate :: buffer_queue:: { BufferQueue , FromSet , NotFromSet , SetResult } ;
34+ pub use crate :: buffer_queue:: { FromSet , NotFromSet , SetResult } ;
3535use crate :: tendril:: StrTendril ;
3636use crate :: { Attribute , LocalName , QualName , SmallCharSet } ;
3737
@@ -43,13 +43,17 @@ pub enum ProcessResult<Handle> {
4343 Continue ,
4444 Suspend ,
4545 Script ( Handle ) ,
46+ #[ cfg( feature = "encoding" ) ]
47+ MaybeChangeEncodingAndStartOver ( & ' static encoding_rs:: Encoding ) ,
4648}
4749
4850#[ must_use]
4951#[ derive( Debug ) ]
5052pub enum TokenizerResult < Handle > {
5153 Done ,
5254 Script ( Handle ) ,
55+ #[ cfg( feature = "encoding" ) ]
56+ MaybeChangeEncodingAndStartOver ( & ' static encoding_rs:: Encoding ) ,
5357}
5458
5559fn option_push ( opt_str : & mut Option < StrTendril > , c : char ) {
@@ -364,6 +368,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
364368 ProcessResult :: Continue => ( ) ,
365369 ProcessResult :: Suspend => break ,
366370 ProcessResult :: Script ( node) => return TokenizerResult :: Script ( node) ,
371+ #[ cfg( feature = "encoding" ) ]
372+ ProcessResult :: MaybeChangeEncodingAndStartOver ( encoding) => {
373+ return TokenizerResult :: MaybeChangeEncodingAndStartOver ( encoding)
374+ } ,
367375 }
368376 }
369377 } else {
@@ -372,6 +380,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
372380 ProcessResult :: Continue => ( ) ,
373381 ProcessResult :: Suspend => break ,
374382 ProcessResult :: Script ( node) => return TokenizerResult :: Script ( node) ,
383+ #[ cfg( feature = "encoding" ) ]
384+ ProcessResult :: MaybeChangeEncodingAndStartOver ( encoding) => {
385+ return TokenizerResult :: MaybeChangeEncodingAndStartOver ( encoding)
386+ } ,
375387 }
376388 }
377389 }
@@ -452,6 +464,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
452464 self . state . set ( states:: RawData ( kind) ) ;
453465 ProcessResult :: Continue
454466 } ,
467+ #[ cfg( feature = "encoding" ) ]
468+ TokenSinkResult :: MaybeChangeEncodingAndStartOver ( encoding) => {
469+ ProcessResult :: MaybeChangeEncodingAndStartOver ( encoding)
470+ } ,
455471 }
456472 }
457473
@@ -1455,6 +1471,8 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
14551471 ProcessResult :: Continue => ( ) ,
14561472 ProcessResult :: Suspend => break ,
14571473 ProcessResult :: Script ( _) => unreachable ! ( ) ,
1474+ #[ cfg( feature = "encoding" ) ]
1475+ ProcessResult :: MaybeChangeEncodingAndStartOver ( _) => unreachable ! ( ) ,
14581476 }
14591477 }
14601478
@@ -1582,13 +1600,36 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
15821600 }
15831601}
15841602
1603+ impl < Sink > InputSink for Tokenizer < Sink >
1604+ where
1605+ Sink : TokenSink ,
1606+ {
1607+ type Handle = Sink :: Handle ;
1608+
1609+ fn feed ( & self , input : & BufferQueue ) -> impl Iterator < Item = InputSinkResult < Self :: Handle > > {
1610+ iter:: from_fn ( || self . feed ( input) . into ( ) )
1611+ }
1612+ }
1613+
1614+ impl < Handle > From < TokenizerResult < Handle > > for Option < InputSinkResult < Handle > > {
1615+ fn from ( value : TokenizerResult < Handle > ) -> Self {
1616+ match value {
1617+ TokenizerResult :: Script ( handle) => Some ( InputSinkResult :: HandleScript ( handle) ) ,
1618+ TokenizerResult :: MaybeChangeEncodingAndStartOver ( encoding) => {
1619+ Some ( InputSinkResult :: MaybeStartOverWithEncoding ( encoding) )
1620+ } ,
1621+ TokenizerResult :: Done => None ,
1622+ }
1623+ }
1624+ }
1625+
15851626#[ cfg( test) ]
15861627#[ allow( non_snake_case) ]
15871628mod test {
15881629 use super :: option_push; // private items
1589- use crate :: tendril:: { SliceExt , StrTendril } ;
1590-
15911630 use super :: { TokenSink , TokenSinkResult , Tokenizer , TokenizerOpts } ;
1631+ use crate :: tendril:: { SliceExt , StrTendril } ;
1632+ use crate :: LocalName ;
15921633
15931634 use super :: interface:: { CharacterTokens , EOFToken , NullCharacterToken , ParseError } ;
15941635 use super :: interface:: { EndTag , StartTag , Tag , TagKind } ;
@@ -1597,8 +1638,6 @@ mod test {
15971638 use markup5ever:: buffer_queue:: BufferQueue ;
15981639 use std:: cell:: RefCell ;
15991640
1600- use crate :: LocalName ;
1601-
16021641 // LinesMatch implements the TokenSink trait. It is used for testing to see
16031642 // if current_line is being updated when process_token is called. The lines
16041643 // vector is a collection of the line numbers that each token is on.
0 commit comments