diff --git a/crates/swc_common/src/input.rs b/crates/swc_common/src/input.rs
index 6cd4d8e35e83..f46a436df776 100644
--- a/crates/swc_common/src/input.rs
+++ b/crates/swc_common/src/input.rs
@@ -10,8 +10,8 @@ pub type SourceFileInput<'a> = StringInput<'a>;
 #[derive(Clone)]
 pub struct StringInput<'a> {
     last_pos: BytePos,
-    /// Current cursor
-    iter: str::Chars<'a>,
+    /// Remaining input as str - we slice this as we consume bytes
+    remaining: &'a str,
     orig: &'a str,
     /// Original start position.
     orig_start: BytePos,
@@ -33,7 +33,7 @@ impl<'a> StringInput<'a> {
         StringInput {
             last_pos: start,
             orig: src,
-            iter: src.chars(),
+            remaining: src,
             orig_start: start,
             orig_end: end,
         }
@@ -41,7 +41,7 @@ impl<'a> StringInput<'a> {
 
     #[inline(always)]
     pub fn as_str(&self) -> &str {
-        self.iter.as_str()
+        self.remaining
     }
 
     #[inline(always)]
@@ -68,21 +68,22 @@ impl<'a> StringInput<'a> {
 
         let ret = unsafe { s.get_unchecked(start_idx..end_idx) };
 
-        self.iter = unsafe { s.get_unchecked(end_idx..) }.chars();
+        self.remaining = unsafe { s.get_unchecked(end_idx..) };
 
         ret
     }
 
     #[inline]
     pub fn bump_bytes(&mut self, n: usize) {
-        let s = self.iter.as_str();
-        self.iter = unsafe { s.get_unchecked(n..) }.chars();
+        debug_assert!(n <= self.remaining.len());
+        self.remaining = unsafe { self.remaining.get_unchecked(n..) };
         self.last_pos.0 += n as u32;
     }
 
     #[inline]
     pub fn bump_one(&mut self) {
-        if self.iter.next().is_some() {
+        if !self.remaining.is_empty() {
+            self.remaining = unsafe { self.remaining.get_unchecked(1..) };
             self.last_pos.0 += 1;
         } else {
             unsafe {
@@ -114,41 +115,56 @@ impl<'a> From<&'a SourceFile> for StringInput<'a> {
 
 impl<'a> Input<'a> for StringInput<'a> {
     #[inline]
-    fn cur(&self) -> Option<char> {
-        self.iter.clone().next()
+    fn cur(&self) -> Option<u8> {
+        self.remaining.as_bytes().first().copied()
     }
 
     #[inline]
-    fn peek(&self) -> Option<char> {
-        let mut iter = self.iter.clone();
-        // https://github.com/rust-lang/rust/blob/1.86.0/compiler/rustc_lexer/src/cursor.rs#L56 say `next` is faster.
-        iter.next();
-        iter.next()
+    fn peek(&self) -> Option<u8> {
+        self.remaining.as_bytes().get(1).copied()
     }
 
     #[inline]
-    fn peek_ahead(&self) -> Option<char> {
-        let mut iter = self.iter.clone();
-        // https://github.com/rust-lang/rust/blob/1.86.0/compiler/rustc_lexer/src/cursor.rs#L56 say `next` is faster
-        iter.next();
-        iter.next();
-        iter.next()
+    fn peek_ahead(&self) -> Option<u8> {
+        self.remaining.as_bytes().get(2).copied()
     }
 
     #[inline]
     unsafe fn bump(&mut self) {
-        if let Some(c) = self.iter.next() {
-            self.last_pos = self.last_pos + BytePos((c.len_utf8()) as u32);
-        } else {
+        let bytes = self.remaining.as_bytes();
+        if bytes.is_empty() {
             unsafe {
                 debug_unreachable!("bump should not be called when cur() == None");
             }
         }
+
+        let first_byte = unsafe { *bytes.get_unchecked(0) };
+
+        // Calculate the number of bytes in this UTF-8 character
+        let len = if first_byte < 0x80 {
+            1 // ASCII
+        } else if first_byte < 0xe0 {
+            2 // 2-byte UTF-8
+        } else if first_byte < 0xf0 {
+            3 // 3-byte UTF-8
+        } else {
+            4 // 4-byte UTF-8
+        };
+
+        self.remaining = unsafe { self.remaining.get_unchecked(len..) };
+        self.last_pos = self.last_pos + BytePos(len as u32);
+    }
+
+    #[inline]
+    fn bump_bytes(&mut self, n: usize) {
+        debug_assert!(n <= self.remaining.len());
+        self.remaining = unsafe { self.remaining.get_unchecked(n..) };
+        self.last_pos.0 += n as u32;
     }
 
     #[inline]
     fn cur_as_ascii(&self) -> Option<u8> {
-        let first_byte = *self.as_str().as_bytes().first()?;
+        let first_byte = *self.remaining.as_bytes().first()?;
         if first_byte <= 0x7f {
             Some(first_byte)
         } else {
@@ -156,6 +172,11 @@ impl<'a> Input<'a> for StringInput<'a> {
         }
     }
 
+    #[inline]
+    fn cur_as_char(&self) -> Option<char> {
+        self.remaining.chars().next()
+    }
+
     #[inline]
     fn is_at_start(&self) -> bool {
         self.orig_start == self.last_pos
@@ -184,7 +205,7 @@ impl<'a> Input<'a> for StringInput<'a> {
 
         let ret = unsafe { s.get_unchecked(start_idx..end_idx) };
 
-        self.iter = unsafe { s.get_unchecked(end_idx..) }.chars();
+        self.remaining = unsafe { s.get_unchecked(end_idx..) };
         self.last_pos = end;
 
         ret
@@ -197,7 +218,7 @@ impl<'a> Input<'a> for StringInput<'a> {
     {
         let last = {
             let mut last = 0;
-            for c in self.iter.clone() {
+            for c in self.remaining.chars() {
                 if pred(c) {
                     last += c.len_utf8();
                 } else {
@@ -207,12 +228,11 @@ impl<'a> Input<'a> for StringInput<'a> {
             last
         };
 
-        let s = self.iter.as_str();
-        debug_assert!(last <= s.len());
-        let ret = unsafe { s.get_unchecked(..last) };
+        debug_assert!(last <= self.remaining.len());
+        let ret = unsafe { self.remaining.get_unchecked(..last) };
 
         self.last_pos = self.last_pos + BytePos(last as _);
-        self.iter = unsafe { s.get_unchecked(last..) }.chars();
+        self.remaining = unsafe { self.remaining.get_unchecked(last..) };
 
         ret
     }
@@ -228,15 +248,13 @@ impl<'a> Input<'a> for StringInput<'a> {
         let idx = (to - self.orig_start).0 as usize;
 
         debug_assert!(idx <= orig.len());
-        let s = unsafe { orig.get_unchecked(idx..) };
-        self.iter = s.chars();
+        self.remaining = unsafe { orig.get_unchecked(idx..) };
         self.last_pos = to;
     }
 
     #[inline]
     fn is_byte(&self, c: u8) -> bool {
-        self.iter
-            .as_str()
+        self.remaining
             .as_bytes()
             .first()
             .map(|b| *b == c)
@@ -245,13 +263,13 @@ impl<'a> Input<'a> for StringInput<'a> {
 
     #[inline]
     fn is_str(&self, s: &str) -> bool {
-        self.as_str().starts_with(s)
+        self.remaining.starts_with(s)
     }
 
     #[inline]
     fn eat_byte(&mut self, c: u8) -> bool {
         if self.is_byte(c) {
-            self.iter.next();
+            self.remaining = unsafe { self.remaining.get_unchecked(1..) };
             self.last_pos = self.last_pos + BytePos(1_u32);
             true
         } else {
@@ -261,9 +279,14 @@ impl<'a> Input<'a> for StringInput<'a> {
 }
 
 pub trait Input<'a>: Clone {
-    fn cur(&self) -> Option<char>;
-    fn peek(&self) -> Option<char>;
-    fn peek_ahead(&self) -> Option<char>;
+    /// Returns the current byte. Returns [None] if at end of input.
+    fn cur(&self) -> Option<u8>;
+
+    /// Returns the next byte without consuming the current byte.
+    fn peek(&self) -> Option<u8>;
+
+    /// Returns the byte after the next byte without consuming anything.
+    fn peek_ahead(&self) -> Option<u8>;
 
     /// # Safety
     ///
@@ -271,18 +294,31 @@ pub trait Input<'a>: Clone {
     /// when the Input is not empty.
     unsafe fn bump(&mut self);
 
-    /// Returns [None] if it's end of input **or** current character is not an
-    /// ascii character.
+    /// Advances the input by `n` bytes. This is more efficient than calling
+    /// `bump()` when you already know the number of bytes to advance (e.g.,
+    /// when you've just decoded a UTF-8 character and know its length).
+    ///
+    /// # Safety
+    ///
+    /// - The caller must ensure that `n` bytes are available in the input
+    /// - `n` must not exceed the actual number of bytes remaining
+    /// - For UTF-8 inputs, `n` must align with character boundaries
+    fn bump_bytes(&mut self, n: usize);
+
+    /// Returns the current byte as ASCII if it's valid ASCII (0x00-0x7F).
+    /// Returns [None] if it's end of input or if the byte is not ASCII.
     #[inline]
     fn cur_as_ascii(&self) -> Option<u8> {
-        self.cur().and_then(|i| {
-            if i.is_ascii() {
-                return Some(i as u8);
-            }
-            None
-        })
+        self.cur()
+            .and_then(|b| if b <= 0x7f { Some(b) } else { None })
     }
 
+    /// Returns the current position as a UTF-8 char for cases where we need
+    /// full character processing (identifiers, strings, etc).
+    /// Returns [None] if at end of input or if the bytes don't form valid
+    /// UTF-8.
+    fn cur_as_char(&self) -> Option<char>;
+
     fn is_at_start(&self) -> bool;
 
     fn cur_pos(&self) -> BytePos;
@@ -306,16 +342,12 @@ pub trait Input<'a>: Clone {
     /// - `to` be in the valid range of input.
     unsafe fn reset_to(&mut self, to: BytePos);
 
-    /// Implementors can override the method to make it faster.
-    ///
-    /// `c` must be ASCII.
+    /// Check if the current byte equals the given byte.
+    /// `c` should typically be an ASCII byte for performance.
     #[inline]
     #[allow(clippy::wrong_self_convention)]
     fn is_byte(&self, c: u8) -> bool {
-        match self.cur() {
-            Some(ch) => ch == c as char,
-            _ => false,
-        }
+        self.cur() == Some(c)
     }
 
     /// Implementors can override the method to make it faster.
@@ -360,12 +392,12 @@ mod tests {
         with_test_sess("foo/d", |mut i| {
             assert_eq!(unsafe { i.slice(BytePos(1), BytePos(2)) }, "f");
             assert_eq!(i.last_pos, BytePos(2));
-            assert_eq!(i.cur(), Some('o'));
+            assert_eq!(i.cur(), Some(b'o'));
 
             assert_eq!(unsafe { i.slice(BytePos(2), BytePos(4)) }, "oo");
             assert_eq!(unsafe { i.slice(BytePos(1), BytePos(4)) }, "foo");
             assert_eq!(i.last_pos, BytePos(4));
-            assert_eq!(i.cur(), Some('/'));
+            assert_eq!(i.cur(), Some(b'/'));
         });
     }
 
@@ -374,10 +406,10 @@ mod tests {
         with_test_sess("load", |mut i| {
             assert_eq!(unsafe { i.slice(BytePos(1), BytePos(3)) }, "lo");
             assert_eq!(i.last_pos, BytePos(3));
-            assert_eq!(i.cur(), Some('a'));
+            assert_eq!(i.cur(), Some(b'a'));
             unsafe { i.reset_to(BytePos(1)) };
 
-            assert_eq!(i.cur(), Some('l'));
+            assert_eq!(i.cur(), Some(b'l'));
             assert_eq!(i.last_pos, BytePos(1));
         });
     }
@@ -391,13 +423,13 @@ mod tests {
 
             // assert_eq!(i.cur_pos(), BytePos(4));
             assert_eq!(i.last_pos, BytePos(4));
-            assert_eq!(i.cur(), Some('/'));
+            assert_eq!(i.cur(), Some(b'/'));
 
             unsafe {
                 i.bump();
             }
             assert_eq!(i.last_pos, BytePos(5));
-            assert_eq!(i.cur(), Some('d'));
+            assert_eq!(i.cur(), Some(b'd'));
 
             unsafe {
                 i.bump();
diff --git a/crates/swc_css_parser/src/lexer/mod.rs b/crates/swc_css_parser/src/lexer/mod.rs
index 904633584aab..2d00990a5ebf 100644
--- a/crates/swc_css_parser/src/lexer/mod.rs
+++ b/crates/swc_css_parser/src/lexer/mod.rs
@@ -26,7 +26,7 @@ where
     comments: Option<&'a dyn Comments>,
     pending_leading_comments: Vec<Comment>,
     input: I,
-    cur: Option<char>,
+    cur: Option<u8>,
     cur_pos: BytePos,
     start_pos: BytePos,
     /// Used to override last_pos
@@ -172,7 +172,11 @@ where
         loop {
             self.read_comments();
 
-            if self.input.uncons_while(is_whitespace).is_empty() {
+            if self
+                .input
+                .uncons_while(|c| is_whitespace(c as u8))
+                .is_empty()
+            {
                 break;
             }
         }
@@ -190,36 +194,42 @@ where
     I: Input<'a>,
 {
     #[inline(always)]
-    fn cur(&mut self) -> Option<char> {
+    fn cur(&mut self) -> Option<u8> {
         self.cur
     }
 
     #[inline(always)]
-    fn next(&mut self) -> Option<char> {
+    fn next(&mut self) -> Option<u8> {
         self.input.cur()
     }
 
     #[inline(always)]
-    fn next_next(&mut self) -> Option<char> {
+    fn next_next(&mut self) -> Option<u8> {
         self.input.peek()
     }
 
     #[inline(always)]
-    fn next_next_next(&mut self) -> Option<char> {
+    fn next_next_next(&mut self) -> Option<u8> {
         self.input.peek_ahead()
     }
 
     #[inline(always)]
-    fn consume(&mut self) -> Option<char> {
+    fn consume(&mut self) -> Option<u8> {
         let cur = self.input.cur();
 
         self.cur = cur;
         self.cur_pos = self.input.last_pos();
 
-        if cur.is_some() {
-            unsafe {
-                // Safety: cur is Some
-                self.input.bump();
+        if let Some(c) = cur {
+            // Optimize: use bump_bytes directly for ASCII (fast path)
+            if c < 0x80 {
+                self.input.bump_bytes(1);
+            } else {
+                // Non-ASCII: use bump() which calculates UTF-8 length
+                unsafe {
+                    // Safety: cur is Some
+                    self.input.bump();
+                }
             }
         }
 
@@ -258,7 +268,7 @@ where
             // whitespace
             // Consume as much whitespace as possible. Return a <whitespace-token>.
             Some(c) if is_whitespace(c) => self.with_buf(|l, buf| {
-                buf.push(c);
+                buf.push(c as char);
 
                 loop {
                     let c = l.next();
@@ -267,7 +277,7 @@ where
                         Some(c) if is_whitespace(c) => {
                             l.consume();
 
-                            buf.push(c);
+                            buf.push(c as char);
                         }
                         _ => {
                             break;
@@ -281,9 +291,9 @@ where
             }),
             // U+0022 QUOTATION MARK (")
             // Consume a string token and return it.
-            Some('"') => self.read_str(None),
+            Some(b'"') => self.read_str(None),
             // U+0023 NUMBER SIGN (#)
-            Some('#') => {
+            Some(b'#') => {
                 let first = self.next();
                 let second = self.next_next();
 
@@ -311,19 +321,21 @@ where
                     });
                 }
 
-                Ok(Token::Delim { value: '#' })
+                Ok(Token::Delim {
+                    value: b'#' as char,
+                })
             }
             // U+0027 APOSTROPHE (')
             // Consume a string token and return it.
-            Some('\'') => self.read_str(None),
+            Some(b'\'') => self.read_str(None),
             // U+0028 LEFT PARENTHESIS (()
             // Return a <(-token>.
-            Some('(') => Ok(tok!("(")),
+            Some(b'(') => Ok(tok!("(")),
             // U+0029 RIGHT PARENTHESIS ())
             // Return a <)-token>.
-            Some(')') => Ok(tok!(")")),
+            Some(b')') => Ok(tok!(")")),
             // U+002B PLUS SIGN (+)
-            Some('+') => {
+            Some(b'+') => {
                 // If the input stream starts with a number, reconsume the current input code
                 // point, consume a numeric token and return it.
                 if self.would_start_number(None, None, None) {
@@ -338,9 +350,9 @@ where
             }
             // U+002C COMMA (,)
             // Return a <comma-token>.
-            Some(',') => Ok(tok!(",")),
+            Some(b',') => Ok(tok!(",")),
             // U+002D HYPHEN-MINUS (-)
-            Some('-') => {
+            Some(b'-') => {
                 // If the input stream starts with a number, reconsume the current input code
                 // point, consume a numeric token, and return it.
                 if self.would_start_number(None, None, None) {
@@ -350,7 +362,7 @@ where
                 }
                 // Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E
                 // GREATER-THAN SIGN (->), consume them and return a <CDC-token>.
-                else if self.next() == Some('-') && self.next_next() == Some('>') {
+                else if self.next() == Some(b'-') && self.next_next() == Some(b'>') {
                     self.consume();
                     self.consume();
 
@@ -369,7 +381,7 @@ where
                 Ok(tok!("-"))
             }
             // U+002E FULL STOP (.)
-            Some('.') => {
+            Some(b'.') => {
                 // If the input stream starts with a number, reconsume the current input code
                 // point, consume a numeric token, and return it.
                 if self.would_start_number(None, None, None) {
@@ -384,18 +396,18 @@ where
             }
             // U+003A COLON (:)
             // Return a <colon-token>.
-            Some(':') => Ok(tok!(":")),
+            Some(b':') => Ok(tok!(":")),
             // U+003B SEMICOLON (;)
             // Return a <semicolon-token>.
-            Some(';') => Ok(tok!(";")),
+            Some(b';') => Ok(tok!(";")),
             // U+003C LESS-THAN SIGN (<)
-            Some('<') => {
+            Some(b'<') => {
                 // If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D
                 // HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), consume them and return a
                 // <CDO-token>.
-                if self.next() == Some('!')
-                    && self.next_next() == Some('-')
-                    && self.next_next_next() == Some('-')
+                if self.next() == Some(b'!')
+                    && self.next_next() == Some(b'-')
+                    && self.next_next_next() == Some(b'-')
                 {
                     self.consume(); // !
                     self.consume(); // -
@@ -409,7 +421,7 @@ where
                 Ok(tok!("<"))
             }
             // U+0040 COMMERCIAL AT (@)
-            Some('@') => {
+            Some(b'@') => {
                 let first = self.next();
                 let second = self.next_next();
                 let third = self.next_next_next();
@@ -428,13 +440,15 @@ where
 
                 // Otherwise, return a <delim-token> with its value set to the current input
                 // code point.
-                Ok(Token::Delim { value: '@' })
+                Ok(Token::Delim {
+                    value: b'@' as char,
+                })
             }
             // U+005B LEFT SQUARE BRACKET ([)
             // Return a <[-token>.
-            Some('[') => Ok(tok!("[")),
+            Some(b'[') => Ok(tok!("[")),
             // U+005C REVERSE SOLIDUS (\)
-            Some('\\') => {
+            Some(b'\\') => {
                 // If the input stream starts with a valid escape, reconsume the current input
                 // code point, consume an ident-like token, and return it.
                 if self.is_valid_escape(None, None) {
@@ -447,20 +461,22 @@ where
                 // to the current input code point.
                 self.emit_error(ErrorKind::InvalidEscape);
 
-                Ok(Token::Delim { value: '\\' })
+                Ok(Token::Delim {
+                    value: b'\\' as char,
+                })
             }
             // U+005D RIGHT SQUARE BRACKET (])
             // Return a <]-token>.
-            Some(']') => Ok(tok!("]")),
+            Some(b']') => Ok(tok!("]")),
             // U+007B LEFT CURLY BRACKET ({)
             // Return a <{-token>.
-            Some('{') => Ok(tok!("{")),
+            Some(b'{') => Ok(tok!("{")),
             // U+007D RIGHT CURLY BRACKET (})
             // Return a <}-token>.
-            Some('}') => Ok(tok!("}")),
+            Some(b'}') => Ok(tok!("}")),
             // digit
             // Reconsume the current input code point, consume a numeric token, and return it.
-            Some('0'..='9') => {
+            Some(b'0'..=b'9') => {
                 self.reconsume();
 
                 self.read_numeric()
@@ -477,7 +493,7 @@ where
             None => Err(ErrorKind::Eof),
             // anything else
             // Return a <delim-token> with its value set to the current input code point.
-            Some(c) => Ok(Token::Delim { value: c }),
+            Some(c) => Ok(Token::Delim { value: c as char }),
         }
     }
 
@@ -490,16 +506,16 @@ where
         // the first U+002A ASTERISK (*) followed by a U+002F SOLIDUS (/), or up to an
         // EOF code point. Return to the start of this step.
         // NOTE: We allow to parse line comments under the option.
-        if self.next() == Some('/') && self.next_next() == Some('*') {
+        if self.next() == Some(b'/') && self.next_next() == Some(b'*') {
             let cmt_start = self.input.last_pos();
 
-            while self.next() == Some('/') && self.next_next() == Some('*') {
+            while self.next() == Some(b'/') && self.next_next() == Some(b'*') {
                 self.consume(); // '*'
                 self.consume(); // '/'
 
                 loop {
                     match self.consume() {
-                        Some('*') if self.next() == Some('/') => {
+                        Some(b'*') if self.next() == Some(b'/') => {
                             self.consume(); // '/'
 
                             if self.comments.is_some() {
@@ -532,10 +548,10 @@ where
                 }
             }
         } else if self.config.allow_wrong_line_comments
-            && self.next() == Some('/')
-            && self.next_next() == Some('/')
+            && self.next() == Some(b'/')
+            && self.next_next() == Some(b'/')
         {
-            while self.next() == Some('/') && self.next_next() == Some('/') {
+            while self.next() == Some(b'/') && self.next_next() == Some(b'/') {
                 self.consume(); // '/'
                 self.consume(); // '/'
 
@@ -600,7 +616,7 @@ where
         }
         // Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it. Create
         // a <percentage-token> with the same value as number, and return it.
-        else if next_first == Some('%') {
+        else if next_first == Some(b'%') {
             self.consume();
 
             return Ok(Token::Percentage {
@@ -625,9 +641,9 @@ where
         // Consume a name, and let string be the result.
         let ident_sequence = self.read_ident_sequence()?;
 
-        // If string’s value is an ASCII case-insensitive match for "url", and the next
+        // If string's value is an ASCII case-insensitive match for "url", and the next
         // input code point is U+0028 LEFT PARENTHESIS ((), consume it.
-        if matches_eq_ignore_ascii_case!(ident_sequence.0, "url") && self.next() == Some('(') {
+        if matches_eq_ignore_ascii_case!(ident_sequence.0, "url") && self.next() == Some(b'(') {
             self.consume();
 
             let start_whitespace = self.input.last_pos();
@@ -639,7 +655,7 @@ where
                     if is_whitespace(next) && is_whitespace(next_next) {
                         l.consume();
 
-                        buf.push(next);
+                        buf.push(next as char);
                     } else {
                         break;
                     }
@@ -655,7 +671,7 @@ where
                 // return it.
                 Some(c)
                     if is_whitespace(c)
-                        && (self.next_next() == Some('"') || self.next_next() == Some('\'')) =>
+                        && (self.next_next() == Some(b'"') || self.next_next() == Some(b'\'')) =>
                 {
                     // Override last position because we consumed whitespaces, but they
                     // should not be part of token
@@ -666,7 +682,7 @@ where
                         raw: ident_sequence.1,
                     });
                 }
-                Some('"' | '\'') => {
+                Some(b'"' | b'\'') => {
                     return Ok(Token::Function {
                         value: ident_sequence.0,
                         raw: ident_sequence.1,
@@ -680,7 +696,7 @@ where
         }
         // Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
         // Create a <function-token> with its value set to string and return it.
-        else if self.next() == Some('(') {
+        else if self.next() == Some(b'(') {
             self.consume();
 
             return Ok(Token::Function {
@@ -699,7 +715,7 @@ where
 
     // This section describes how to consume a string token from a stream of code
     // points. It returns either a <string-token> or <bad-string-token>.
-    fn read_str(&mut self, maybe_ending_code_point: Option<char>) -> LexResult<Token> {
+    fn read_str(&mut self, maybe_ending_code_point: Option<u8>) -> LexResult<Token> {
         self.with_buf_and_raw_buf(|l, buf, raw| {
             // This algorithm may be called with an ending code point, which denotes the
             // code point that ends the string. If an ending code point is not specified,
@@ -709,15 +725,27 @@ where
             // Initially create a <string-token> with its value set to the empty string.
             // Done above
 
-            raw.push(ending_code_point.unwrap());
+            raw.push(ending_code_point.unwrap() as char);
 
             // Repeatedly consume the next input code point from the stream:
             loop {
+                // Get the full character before consuming (for non-ASCII)
+                let cur_byte = l.input.cur();
+                let cur_char = if let Some(b) = cur_byte {
+                    if is_non_ascii(b) {
+                        l.input.cur_as_char()
+                    } else {
+                        Some(b as char)
+                    }
+                } else {
+                    None
+                };
+
                 match l.consume() {
                     // ending code point
                     // Return the <string-token>.
                     Some(c) if c == ending_code_point.unwrap() => {
-                        raw.push(c);
+                        raw.push(c as char);
 
                         break;
                     }
@@ -746,7 +774,7 @@ where
                     }
 
                     // U+005C REVERSE SOLIDUS (\)
-                    Some(c) if c == '\\' => {
+                    Some(c) if c == b'\\' => {
                         let next = l.next();
 
                         // If the next input code point is EOF, do nothing.
@@ -757,26 +785,28 @@ where
                         else if l.next().is_some() && is_newline(l.next().unwrap()) {
                             l.consume();
 
-                            raw.push(c);
-                            raw.push(next.unwrap());
+                            raw.push(c as char);
+                            raw.push(next.unwrap() as char);
                         }
                         // Otherwise, (the stream starts with a valid escape) consume an escaped
                         // code point and append the returned code point to
-                        // the <string-token>’s value.
+                        // the <string-token>'s value.
                         else if l.is_valid_escape(None, None) {
                             let escape = l.read_escape()?;
 
                             buf.push(escape.0);
-                            raw.push(c);
+                            raw.push(c as char);
                             raw.push_str(&escape.1);
                         }
                     }
 
                     // Anything else
-                    // Append the current input code point to the <string-token>’s value.
-                    Some(c) => {
-                        buf.push(c);
-                        raw.push(c);
+                    // Append the current input code point to the <string-token>'s value.
+                    Some(_) => {
+                        if let Some(ch) = cur_char {
+                            buf.push(ch);
+                            raw.push(ch);
+                        }
                     }
                 }
             }
@@ -798,9 +828,15 @@ where
             // Consume as much whitespace as possible.
             while let Some(c) = l.next() {
                 if is_whitespace(c) {
+                    // Get char before consuming
+                    let ch = if is_non_ascii(c) {
+                        l.input.cur_as_char().unwrap_or(c as char)
+                    } else {
+                        c as char
+                    };
                     l.consume();
 
-                    raw.push(c);
+                    raw.push(ch);
                 } else {
                     break;
                 }
@@ -808,10 +844,22 @@ where
 
             // Repeatedly consume the next input code point from the stream:
             loop {
+                // Get the full character before consuming (for non-ASCII)
+                let cur_byte = l.input.cur();
+                let cur_char = if let Some(b) = cur_byte {
+                    if is_non_ascii(b) {
+                        l.input.cur_as_char()
+                    } else {
+                        Some(b as char)
+                    }
+                } else {
+                    None
+                };
+
                 match l.consume() {
                     // U+0029 RIGHT PARENTHESIS ())
                     // Return the <url-token>.
-                    Some(')') => {
+                    Some(b')') => {
                         return Ok(Token::Url {
                             value: l.atoms.atom(&**out),
                             raw: Box::new(UrlKeyValue(name.1, l.atoms.atom(&**raw))),
@@ -833,13 +881,21 @@ where
                     Some(c) if is_whitespace(c) => {
                         // Consume as much whitespace as possible.
                         let whitespaces: String = l.with_sub_buf(|l, buf| {
-                            buf.push(c);
+                            if let Some(ch) = cur_char {
+                                buf.push(ch);
+                            }
 
                             while let Some(c) = l.next() {
                                 if is_whitespace(c) {
+                                    // Get char before consuming
+                                    let ch = if is_non_ascii(c) {
+                                        l.input.cur_as_char().unwrap_or(c as char)
+                                    } else {
+                                        c as char
+                                    };
                                     l.consume();
 
-                                    buf.push(c);
+                                    buf.push(ch);
                                 } else {
                                     break;
                                 }
@@ -852,7 +908,7 @@ where
                         // consume it and return the <url-token> (if EOF was
                         // encountered, this is a parse error);
                         match l.next() {
-                            Some(')') => {
+                            Some(b')') => {
                                 l.consume();
 
                                 raw.push_str(&whitespaces);
@@ -894,12 +950,12 @@ where
                     // non-printable code point
                     // This is a parse error. Consume the remnants of a bad url, create a
                     // <bad-url-token>, and return it.
-                    Some(c) if c == '"' || c == '\'' || c == '(' || is_non_printable(c) => {
+                    Some(c) if c == b'"' || c == b'\'' || c == b'(' || is_non_printable(c) => {
                         l.emit_error(ErrorKind::UnexpectedCharInUrl);
 
                         let remnants = l.read_bad_url_remnants()?;
 
-                        raw.push(c);
+                        raw.push(c as char);
                         raw.push_str(&remnants);
 
                         return Ok(Token::BadUrl {
@@ -908,15 +964,15 @@ where
                     }
 
                     // U+005C REVERSE SOLIDUS (\)
-                    Some(c) if c == '\\' => {
+                    Some(c) if c == b'\\' => {
                         // If the stream starts with a valid escape, consume an escaped code point
                         // and append the returned code point to the
-                        // <url-token>’s value.
+                        // <url-token>'s value.
                         if l.is_valid_escape(None, None) {
                             let escaped = l.read_escape()?;
 
                             out.push(escaped.0);
-                            raw.push(c);
+                            raw.push(c as char);
                             raw.push_str(&escaped.1);
                         }
                         // Otherwise, this is a parse error. Consume the remnants of a bad url,
@@ -926,7 +982,7 @@ where
 
                             let remnants = l.read_bad_url_remnants()?;
 
-                            raw.push(c);
+                            raw.push(c as char);
                             raw.push_str(&remnants);
 
                             return Ok(Token::BadUrl {
@@ -936,10 +992,12 @@ where
                     }
 
                     // anything else
-                    // Append the current input code point to the <url-token>’s value.
-                    Some(c) => {
-                        out.push(c);
-                        raw.push(c);
+                    // Append the current input code point to the <url-token>'s value.
+                    Some(_) => {
+                        if let Some(ch) = cur_char {
+                            out.push(ch);
+                            raw.push(ch);
+                        }
                     }
                 }
             }
@@ -953,26 +1011,38 @@ where
     // will return a code point.
     fn read_escape(&mut self) -> LexResult<(char, String)> {
         self.with_sub_buf(|l, buf| {
+            // Get the full character before consuming (for non-ASCII)
+            let cur_byte = l.input.cur();
+            let cur_char = if let Some(b) = cur_byte {
+                if is_non_ascii(b) {
+                    l.input.cur_as_char()
+                } else {
+                    Some(b as char)
+                }
+            } else {
+                None
+            };
+
             // Consume the next input code point.
             match l.consume() {
                 // hex digit
                 Some(c) if is_hex_digit(c) => {
-                    let mut hex = c.to_digit(16).unwrap();
+                    let mut hex = (c as char).to_digit(16).unwrap();
 
-                    buf.push(c);
+                    buf.push(c as char);
 
                     // Consume as many hex digits as possible, but no more than 5.
                     // Note that this means 1-6 hex digits have been consumed in total.
                     for _ in 0..5 {
                         let next = l.next();
-                        let digit = match next.and_then(|c| c.to_digit(16)) {
+                        let digit = match next.and_then(|c| (c as char).to_digit(16)) {
                             Some(v) => v,
                             None => break,
                         };
 
                         l.consume();
 
-                        buf.push(next.unwrap());
+                        buf.push(next.unwrap() as char);
                         hex = hex * 16 + digit;
                     }
 
@@ -983,7 +1053,7 @@ where
                         if is_whitespace(next) {
                             l.consume();
 
-                            buf.push(next);
+                            buf.push(next as char);
                         }
                     }
 
@@ -1017,9 +1087,10 @@ where
                 // anything else
                 // Return the current input code point.
                 Some(c) => {
-                    buf.push(c);
+                    let ch = cur_char.unwrap_or(c as char);
+                    buf.push(ch);
 
-                    Ok((c, (&**buf).into()))
+                    Ok((ch, (&**buf).into()))
                 }
             }
         })
@@ -1031,9 +1102,9 @@ where
     // or can be called with the input stream itself. In the latter case, the two
     // code points in question are the current input code point and the next input
     // code point, in that order.
-    fn is_valid_escape(&mut self, maybe_first: Option<char>, maybe_second: Option<char>) -> bool {
+    fn is_valid_escape(&mut self, maybe_first: Option<u8>, maybe_second: Option<u8>) -> bool {
         // If the first code point is not U+005C REVERSE SOLIDUS (\), return false.
-        if maybe_first.or_else(|| self.cur()) != Some('\\') {
+        if maybe_first.or_else(|| self.cur()) != Some(b'\\') {
             return false;
         }
 
@@ -1053,16 +1124,16 @@ where
     // the next two input code points, in that order.
     fn would_start_ident(
         &mut self,
-        maybe_first: Option<char>,
-        maybe_second: Option<char>,
-        maybe_third: Option<char>,
+        maybe_first: Option<u8>,
+        maybe_second: Option<u8>,
+        maybe_third: Option<u8>,
     ) -> bool {
         // Look at the first code point:
         let first = maybe_first.or_else(|| self.cur());
 
         match first {
             // U+002D HYPHEN-MINUS
-            Some('-') => {
+            Some(b'-') => {
                 let second = maybe_second.or_else(|| self.next());
 
                 match second {
@@ -1071,7 +1142,7 @@ where
                     Some(c) if is_name_start(c) => true,
                     // or a U+002D HYPHEN-MINUS,
                     // return true.
-                    Some('-') => true,
+                    Some(b'-') => true,
                     // or the second and third code points are a valid escape
                     // return true.
                     Some(_) => {
@@ -1089,7 +1160,7 @@ where
             // U+005C REVERSE SOLIDUS (\)
             // If the first and second code points are a valid escape, return true. Otherwise,
             // return false.
-            Some('\\') => {
+            Some(b'\\') => {
                 let second = maybe_second.or_else(|| self.next());
 
                 self.is_valid_escape(first, second)
@@ -1107,9 +1178,9 @@ where
     #[allow(clippy::needless_return)]
     fn would_start_number(
         &mut self,
-        maybe_first: Option<char>,
-        maybe_second: Option<char>,
-        maybe_third: Option<char>,
+        maybe_first: Option<u8>,
+        maybe_second: Option<u8>,
+        maybe_third: Option<u8>,
     ) -> bool {
         // Look at the first code point:
         let first = maybe_first.or_else(|| self.cur());
@@ -1117,13 +1188,13 @@ where
         match first {
             // U+002B PLUS SIGN (+)
             // U+002D HYPHEN-MINUS (-)
-            Some('+') | Some('-') => {
+            Some(b'+') | Some(b'-') => {
                 match maybe_second.or_else(|| self.next()) {
                     // If the second code point is a digit, return true.
                     Some(second) if second.is_ascii_digit() => return true,
                     // Otherwise, if the second code point is a U+002E FULL STOP (.) and the
                     // third code point is a digit, return true.
-                    Some('.') => {
+                    Some(b'.') => {
                         if let Some(third) = maybe_third.or_else(|| self.next_next()) {
                             if third.is_ascii_digit() {
                                 return true;
@@ -1137,7 +1208,7 @@ where
                 };
             }
             // U+002E FULL STOP (.)
-            Some('.') => {
+            Some(b'.') => {
                 // If the second code point is a digit, return true.
                 if let Some(second) = self.next() {
                     if second.is_ascii_digit() {
@@ -1168,20 +1239,37 @@ where
 
             // Repeatedly consume the next input code point from the stream:
             loop {
-                match l.consume() {
+                // For non-ASCII bytes, we need to get the full UTF-8 character before consuming
+                let cur_byte = l.input.cur();
+                let cur_char = if let Some(b) = cur_byte {
+                    if is_non_ascii(b) {
+                        l.input.cur_as_char()
+                    } else {
+                        Some(b as char)
+                    }
+                } else {
+                    None
+                };
+
+                let c = l.consume();
+
+                match c {
                     // name code point
                     // Append the code point to result.
-                    Some(c) if is_name(c) => {
-                        buf.push(c);
-                        raw.push(c);
+                    Some(byte) if is_name(byte) => {
+                        // Use the full character we got earlier
+                        if let Some(ch) = cur_char {
+                            buf.push(ch);
+                            raw.push(ch);
+                        }
                     }
                     // the stream starts with a valid escape
                     // Consume an escaped code point. Append the returned code point to result.
-                    Some(c) if l.is_valid_escape(None, None) => {
+                    Some(byte) if l.is_valid_escape(None, None) => {
                         let escaped = l.read_escape()?;
 
                         buf.push(escaped.0);
-                        raw.push(c);
+                        raw.push(byte as char);
                         raw.push_str(&escaped.1);
                     }
                     // anything else
@@ -1209,10 +1297,10 @@ where
             // (-), consume it and append it to repr.
             let next = l.next();
 
-            if next == Some('+') || next == Some('-') {
+            if next == Some(b'+') || next == Some(b'-') {
                 l.consume();
 
-                out.push(next.unwrap());
+                out.push(next.unwrap() as char);
             }
 
             // While the next input code point is a digit, consume it and append it to repr.
@@ -1220,7 +1308,7 @@ where
                 if c.is_ascii_digit() {
                     l.consume();
 
-                    out.push(c);
+                    out.push(c as char);
                 } else {
                     break;
                 }
@@ -1230,7 +1318,7 @@ where
             // then:
             let next = l.next();
 
-            if next == Some('.') {
+            if next == Some(b'.') {
                 if let Some(n) = l.next_next() {
                     if n.is_ascii_digit() {
                         // Consume them.
@@ -1238,8 +1326,8 @@ where
                         l.consume();
 
                         // Append them to repr.
-                        out.push(next.unwrap());
-                        out.push(n);
+                        out.push(next.unwrap() as char);
+                        out.push(n as char);
 
                         // Set type to "number".
                         type_flag = NumberType::Number;
@@ -1250,7 +1338,7 @@ where
                             if c.is_ascii_digit() {
                                 l.consume();
 
-                                out.push(c);
+                                out.push(c as char);
                             } else {
                                 break;
                             }
@@ -1264,12 +1352,12 @@ where
             // (-) or U+002B PLUS SIGN (+), followed by a digit, then:
             let next = l.next();
 
-            if next == Some('E') || next == Some('e') {
+            if next == Some(b'E') || next == Some(b'e') {
                 let next_next = l.next_next();
                 let next_next_next = l.next_next_next();
 
-                if (next_next == Some('-')
-                    || next_next == Some('+')
+                if (next_next == Some(b'-')
+                    || next_next == Some(b'+')
                         && next_next_next.is_some()
                         && next_next_next.unwrap().is_ascii_digit())
                     || next_next.is_some() && next_next.unwrap().is_ascii_digit()
@@ -1279,8 +1367,8 @@ where
                     l.consume();
 
                     // Append them to repr.
-                    out.push(next.unwrap());
-                    out.push(next_next.unwrap());
+                    out.push(next.unwrap() as char);
+                    out.push(next_next.unwrap() as char);
 
                     // Set type to "number".
                     type_flag = NumberType::Number;
@@ -1291,7 +1379,7 @@ where
                         if c.is_ascii_digit() {
                             l.consume();
 
-                            out.push(c);
+                            out.push(c as char);
                         } else {
                             break;
                         }
@@ -1322,12 +1410,24 @@ where
         self.with_sub_buf(|l, raw| {
             // Repeatedly consume the next input code point from the stream:
             loop {
+                // Get the full character before consuming (for non-ASCII)
+                let cur_byte = l.input.cur();
+                let cur_char = if let Some(b) = cur_byte {
+                    if is_non_ascii(b) {
+                        l.input.cur_as_char()
+                    } else {
+                        Some(b as char)
+                    }
+                } else {
+                    None
+                };
+
                 match l.consume() {
                     // U+0029 RIGHT PARENTHESIS ())
                     // EOF
                     // Return.
-                    Some(c @ ')') => {
-                        raw.push(c);
+                    Some(c @ b')') => {
+                        raw.push(c as char);
 
                         break;
                     }
@@ -1340,13 +1440,15 @@ where
                         // ("\)") to be encountered without ending the <bad-url-token>.
                         let escaped = l.read_escape()?;
 
-                        raw.push(c);
+                        raw.push(c as char);
                         raw.push_str(&escaped.1);
                     }
                     // anything else
                     // Do nothing.
-                    Some(c) => {
-                        raw.push(c);
+                    Some(_) => {
+                        if let Some(ch) = cur_char {
+                            raw.push(ch);
+                        }
                     }
                 }
             }
@@ -1357,61 +1459,61 @@ where
 }
 
 #[inline(always)]
-fn is_digit(c: char) -> bool {
+fn is_digit(c: u8) -> bool {
     c.is_ascii_digit()
 }
 
 #[inline(always)]
-fn is_hex_digit(c: char) -> bool {
+fn is_hex_digit(c: u8) -> bool {
     match c {
         c if is_digit(c) => true,
-        'A'..='F' => true,
-        'a'..='f' => true,
+        b'A'..=b'F' => true,
+        b'a'..=b'f' => true,
         _ => false,
     }
 }
 
 #[inline(always)]
-fn is_uppercase_letter(c: char) -> bool {
+fn is_uppercase_letter(c: u8) -> bool {
     c.is_ascii_uppercase()
 }
 
 #[inline(always)]
-fn is_lowercase_letter(c: char) -> bool {
+fn is_lowercase_letter(c: u8) -> bool {
     c.is_ascii_lowercase()
 }
 
 #[inline(always)]
-fn is_letter(c: char) -> bool {
+fn is_letter(c: u8) -> bool {
     is_uppercase_letter(c) || is_lowercase_letter(c)
 }
 
 #[inline(always)]
-fn is_non_ascii(c: char) -> bool {
-    c as u32 >= 0x80
+fn is_non_ascii(c: u8) -> bool {
+    c >= 0x80
 }
 
 #[inline(always)]
-fn is_name_start(c: char) -> bool {
-    matches!(c, c if is_letter(c) || is_non_ascii(c) || c == '_' || c == '\x00')
+fn is_name_start(c: u8) -> bool {
+    matches!(c, c if is_letter(c) || is_non_ascii(c) || c == b'_' || c == 0x00)
 }
 
 #[inline(always)]
-fn is_name(c: char) -> bool {
-    is_name_start(c) || matches!(c, c if c.is_ascii_digit() || c == '-')
+fn is_name(c: u8) -> bool {
+    is_name_start(c) || matches!(c, c if c.is_ascii_digit() || c == b'-')
 }
 
 #[inline(always)]
-fn is_non_printable(c: char) -> bool {
-    matches!(c, '\x00'..='\x08' | '\x0B' | '\x0E'..='\x1F' | '\x7F')
+fn is_non_printable(c: u8) -> bool {
+    matches!(c, 0x00..=0x08 | 0x0B | 0x0E..=0x1F | 0x7F)
 }
 
 #[inline(always)]
-fn is_newline(c: char) -> bool {
-    matches!(c, '\n' | '\r' | '\x0C')
+fn is_newline(c: u8) -> bool {
+    matches!(c, b'\n' | b'\r' | 0x0c)
 }
 
 #[inline(always)]
-fn is_whitespace(c: char) -> bool {
-    matches!(c, c if c == ' ' || c == '\t' || is_newline(c))
+fn is_whitespace(c: u8) -> bool {
+    matches!(c, c if c == b' ' || c == b'\t' || is_newline(c))
 }
diff --git a/crates/swc_ecma_lexer/src/common/lexer/char.rs b/crates/swc_ecma_lexer/src/common/lexer/char.rs
index 705a3fd05f70..62f4e4d08a49 100644
--- a/crates/swc_ecma_lexer/src/common/lexer/char.rs
+++ b/crates/swc_ecma_lexer/src/common/lexer/char.rs
@@ -1,8 +1,8 @@
-/// Implemented for `char`.
+/// Implemented for `u8` - operates on bytes for performance.
 pub trait CharExt: Copy {
     fn to_char(self) -> Option<char>;
 
-    /// Test whether a given character code starts an identifier.
+    /// Test whether a given byte/character starts an identifier.
     ///
     /// https://tc39.github.io/ecma262/#prod-IdentifierStart
     #[inline]
@@ -14,7 +14,7 @@ pub trait CharExt: Copy {
         swc_ecma_ast::Ident::is_valid_start(c)
     }
 
-    /// Test whether a given character is part of an identifier.
+    /// Test whether a given byte/character is part of an identifier.
     #[inline]
     fn is_ident_part(self) -> bool {
         let c = match self.to_char() {
@@ -65,6 +65,20 @@ pub trait CharExt: Copy {
     }
 }
 
+impl CharExt for u8 {
+    #[inline(always)]
+    fn to_char(self) -> Option<char> {
+        // For ASCII bytes, this is a fast path
+        if self <= 0x7f {
+            Some(self as char)
+        } else {
+            // For non-ASCII bytes, we can't convert a single byte to a char
+            // The caller should use cur_as_char() on the Input trait instead
+            None
+        }
+    }
+}
+
 impl CharExt for char {
     #[inline(always)]
     fn to_char(self) -> Option<char> {
diff --git a/crates/swc_ecma_lexer/src/common/lexer/mod.rs b/crates/swc_ecma_lexer/src/common/lexer/mod.rs
index 852877860f3f..3bc438454779 100644
--- a/crates/swc_ecma_lexer/src/common/lexer/mod.rs
+++ b/crates/swc_ecma_lexer/src/common/lexer/mod.rs
@@ -175,20 +175,25 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
     }
 
     #[inline(always)]
-    fn cur(&self) -> Option<char> {
+    fn cur(&self) -> Option<u8> {
         self.input().cur()
     }
 
     #[inline(always)]
-    fn peek(&self) -> Option<char> {
+    fn peek(&self) -> Option<u8> {
         self.input().peek()
     }
 
     #[inline(always)]
-    fn peek_ahead(&self) -> Option<char> {
+    fn peek_ahead(&self) -> Option<u8> {
         self.input().peek_ahead()
     }
 
+    #[inline(always)]
+    fn cur_as_char(&self) -> Option<char> {
+        self.input().cur_as_char()
+    }
+
     #[inline(always)]
     fn cur_pos(&self) -> BytePos {
         self.input().cur_pos()
@@ -364,8 +369,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
     fn skip_block_comment(&mut self) {
         let start = self.cur_pos();
 
-        debug_assert_eq!(self.cur(), Some('/'));
-        debug_assert_eq!(self.peek(), Some('*'));
+        debug_assert_eq!(self.cur(), Some(b'/'));
+        debug_assert_eq!(self.peek(), Some(b'*'));
 
         // Consume initial "/*"
         self.input_mut().bump_bytes(2);
@@ -417,7 +422,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
             match matched_byte {
                 b'*' => {
-                    if self.peek() == Some('/') {
+                    if self.peek() == Some(b'/') {
                         // Consume "*/"
                         self.input_mut().bump_bytes(2);
 
@@ -476,13 +481,13 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
                 b'\r' => {
                     should_mark_had_line_break = true;
                     self.bump();
-                    if self.peek() == Some('\n') {
+                    if self.peek() == Some(b'\n') {
                         self.bump();
                     }
                 }
                 _ => {
                     // Unicode line terminator (LS/PS) or other character
-                    if let Some('\u{2028}' | '\u{2029}') = self.cur() {
+                    if let Some('\u{2028}' | '\u{2029}') = self.cur_as_char() {
                         should_mark_had_line_break = true;
                     }
                     self.bump();
@@ -516,10 +521,10 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
             if LEX_COMMENTS && self.input().is_byte(b'/') {
                 if let Some(c) = self.peek() {
-                    if c == '/' {
+                    if c == b'/' {
                         self.skip_line_comment(2);
                         continue;
-                    } else if c == '*' {
+                    } else if c == b'*' {
                         self.skip_block_comment();
                         continue;
                     }
@@ -575,24 +580,24 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         let mut prev = None;
 
         while let Some(c) = self.cur() {
-            if c == '_' {
+            if c == b'_' {
                 *has_underscore = true;
                 if allow_num_separator {
-                    let is_allowed = |c: Option<char>| {
+                    let is_allowed = |c: Option<u8>| {
                         let Some(c) = c else {
                             return false;
                         };
-                        c.is_digit(RADIX as _)
+                        (c as char).is_digit(RADIX as _)
                     };
-                    let is_forbidden = |c: Option<char>| {
+                    let is_forbidden = |c: Option<u8>| {
                         let Some(c) = c else {
                             return false;
                         };
 
                         if RADIX == 16 {
-                            matches!(c, '.' | 'X' | '_' | 'x')
+                            matches!(c, b'.' | b'X' | b'_' | b'x')
                         } else {
-                            matches!(c, '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o')
+                            matches!(c, b'.' | b'B' | b'E' | b'O' | b'_' | b'b' | b'e' | b'o')
                         }
                     };
 
@@ -616,7 +621,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
             }
 
             // e.g. (val for a) = 10  where radix = 16
-            let val = if let Some(val) = c.to_digit(RADIX as _) {
+            let val = if let Some(val) = (c as char).to_digit(RADIX as _) {
                 val
             } else {
                 return Ok(total);
@@ -692,7 +697,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         let lazy_integer = if START_WITH_DOT {
             // first char is '.'
             debug_assert!(
-                self.cur().is_some_and(|c| c == '.'),
+                self.cur().is_some_and(|c| c == b'.'),
                 "read_number<START_WITH_DOT = true> expects current char to be '.'"
             );
             LazyInteger {
@@ -703,7 +708,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
             }
         } else {
             debug_assert!(!START_WITH_DOT);
-            debug_assert!(!START_WITH_ZERO || self.cur().unwrap() == '0');
+            debug_assert!(!START_WITH_ZERO || self.cur().unwrap() == b'0');
 
             // Use read_number_no_dot to support long numbers.
             let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
@@ -771,7 +776,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         has_underscore |= lazy_integer.has_underscore;
         // At this point, number cannot be an octal literal.
 
-        let has_dot = self.cur() == Some('.');
+        let has_dot = self.cur() == Some(b'.');
         //  `0.a`, `08.a`, `102.a` are invalid.
         //
         // `.1.a`, `.1e-4.a` are valid,
@@ -785,7 +790,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
             self.read_digits::<_, (), 10>(|_, _, _| Ok(((), true)), true, &mut has_underscore)?;
         }
 
-        let has_e = self.cur().is_some_and(|c| c == 'e' || c == 'E');
+        let has_e = self.cur().is_some_and(|c| c == b'e' || c == b'E');
         // Handle 'e' and 'E'
         //
         // .5e1 = 5
@@ -803,7 +808,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
                 }
             };
 
-            if next == '+' || next == '-' {
+            if next == b'+' || next == b'-' {
                 self.bump(); // remove '+', '-'
             }
 
@@ -875,12 +880,12 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         );
         let start = self.cur_pos();
 
-        debug_assert_eq!(self.cur(), Some('0'));
+        debug_assert_eq!(self.cur(), Some(b'0'));
         self.bump();
 
         debug_assert!(self
             .cur()
-            .is_some_and(|c| matches!(c, 'b' | 'B' | 'o' | 'O' | 'x' | 'X')));
+            .is_some_and(|c| matches!(c, b'b' | b'B' | b'o' | b'O' | b'x' | b'X')));
         self.bump();
 
         let lazy_integer = self.read_number_no_dot_as_str::<RADIX>()?;
@@ -996,14 +1001,14 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
         let mut s = SmartString::<LazyCompact>::default();
 
-        debug_assert!(self.input().cur().is_some_and(|c| c == '&'));
+        debug_assert!(self.input().cur().is_some_and(|c| c == b'&'));
         self.bump();
 
         let start_pos = self.input().cur_pos();
 
         for _ in 0..10 {
             let c = match self.input().cur() {
-                Some(c) => c,
+                Some(c) => c as char,
                 None => break,
             };
             self.bump();
@@ -1041,10 +1046,10 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
     fn read_jsx_new_line(&mut self, normalize_crlf: bool) -> LexResult<Either<&'static str, char>> {
         debug_assert!(self.syntax().jsx());
-        let ch = self.input().cur().unwrap();
+        let ch = self.input().cur().unwrap() as char;
         self.bump();
 
-        let out = if ch == '\r' && self.input().cur() == Some('\n') {
+        let out = if ch == '\r' && self.input().cur() == Some(b'\n') {
             self.bump(); // `\n`
             Either::Left(if normalize_crlf { "\n" } else { "\r\n" })
         } else {
@@ -1064,7 +1069,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         let mut chunk_start = self.input().cur_pos();
         loop {
             let ch = match self.input().cur() {
-                Some(c) => c,
+                Some(c) => c as char,
                 None => {
                     self.emit_error(start, SyntaxError::UnterminatedStrLit);
                     break;
@@ -1185,8 +1190,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         // returned `Some`, and already exited.
         debug_assert!(high >= MIN_HIGH);
         let is_pair = high <= MAX_HIGH
-            && self.input().cur() == Some('\\')
-            && self.input().peek() == Some('u');
+            && self.input().cur() == Some(b'\\')
+            && self.input().peek() == Some(b'u');
         if !is_pair {
             return Ok(Some(UnicodeEscape::LoneSurrogate(high)));
         }
@@ -1220,7 +1225,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
     }
 
     fn read_unicode_escape(&mut self) -> LexResult<UnicodeEscape> {
-        debug_assert_eq!(self.cur(), Some('u'));
+        debug_assert_eq!(self.cur(), Some(b'u'));
 
         let mut is_curly = false;
 
@@ -1304,7 +1309,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
     #[cold]
     fn read_shebang(&mut self) -> LexResult<Option<Atom>> {
-        if self.input().cur() != Some('#') || self.input().peek() != Some('!') {
+        if self.input().cur() != Some(b'#') || self.input().peek() != Some(b'!') {
             return Ok(None);
         }
         self.bump(); // `#`
@@ -1336,11 +1341,11 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         // Handle edge case for immediate template end
         if start == self.cur_pos() && self.state().last_was_tpl_element() {
             if let Some(c) = self.cur() {
-                if c == '$' && self.peek() == Some('{') {
+                if c == b'$' && self.peek() == Some(b'{') {
                     self.bump(); // '$'
                     self.bump(); // '{'
                     return Ok(Self::Token::DOLLAR_LBRACE);
-                } else if c == '`' {
+                } else if c == b'`' {
                     self.bump(); // '`'
                     return Ok(Self::Token::BACKQUOTE);
                 }
@@ -1361,7 +1366,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
             match matched_byte {
                 b'$' => {
                     // Check if this is ${
-                    if self.peek() == Some('{') {
+                    if self.peek() == Some(b'{') {
                         // Found template substitution
                         let cooked = if cooked_slice_start == raw_slice_start {
                             let last_pos = self.cur_pos();
@@ -1413,7 +1418,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
                     // Handle carriage return - consume \r and optionally \n, normalize to \n
                     self.bump(); // '\r'
-                    if self.peek() == Some('\n') {
+                    if self.peek() == Some(b'\n') {
                         self.bump(); // '\n'
                     }
 
@@ -1449,14 +1454,14 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
     ///
     /// In template literal, we should preserve raw string.
     fn read_escaped_char(&mut self, in_template: bool) -> LexResult<Option<CodePoint>> {
-        debug_assert_eq!(self.cur(), Some('\\'));
+        debug_assert_eq!(self.cur(), Some(b'\\'));
 
         let start = self.cur_pos();
 
         self.bump(); // '\'
 
         let c = match self.cur() {
-            Some(c) => c,
+            Some(c) => c as char,
             None => self.error_span(pos_span(start), SyntaxError::InvalidStrEscape)?,
         };
 
@@ -1510,7 +1515,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
                 let first_c = if c == '0' {
                     match self.cur() {
-                        Some(next) if next.is_digit(8) => c,
+                        Some(next) if (next as char).is_digit(8) => c,
                         // \0 is not an octal literal nor decimal literal.
                         _ => return Ok(Some(CodePoint::from_char('\u{0000}'))),
                     }
@@ -1531,7 +1536,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
                     ($check:expr) => {{
                         let cur = self.cur();
 
-                        match cur.and_then(|c| c.to_digit(8)) {
+                        match cur.and_then(|c| (c as char).to_digit(8)) {
                             Some(v) => {
                                 value = if $check {
                                     let new_val = value
@@ -1575,7 +1580,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
             self.input_mut().reset_to(start);
         }
 
-        debug_assert_eq!(self.cur(), Some('/'));
+        debug_assert_eq!(self.cur(), Some(b'/'));
 
         let start = self.cur_pos();
 
@@ -1586,6 +1591,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         let (mut escaped, mut in_class) = (false, false);
 
         while let Some(c) = self.cur() {
+            let c = c as char;
             // This is ported from babel.
             // Seems like regexp literal cannot contain linebreak.
             if c.is_line_terminator() {
@@ -1780,7 +1786,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
                 // ASCII but not a valid identifier
                 break;
-            } else if let Some(c) = self.input().cur() {
+            } else if let Some(c) = self.input().cur_as_char() {
                 if Ident::is_valid_non_ascii_continue(c) {
                     self.bump();
                     continue;
@@ -1813,14 +1819,14 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
     /// `#`
     fn read_token_number_sign(&mut self) -> LexResult<Self::Token> {
-        debug_assert!(self.cur().is_some_and(|c| c == '#'));
+        debug_assert!(self.cur().is_some_and(|c| c == b'#'));
 
         self.bump(); // '#'
 
         // `#` can also be a part of shebangs, however they should have been
         // handled by `read_shebang()`
         debug_assert!(
-            !self.input().is_at_start() || self.cur() != Some('!'),
+            !self.input().is_at_start() || self.cur() != Some(b'!'),
             "#! should have already been handled by read_shebang()"
         );
         Ok(Self::Token::HASH)
@@ -1831,7 +1837,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
     /// This is extracted as a method to reduce size of `read_token`.
     #[inline(never)]
     fn read_token_dot(&mut self) -> LexResult<Self::Token> {
-        debug_assert!(self.cur().is_some_and(|c| c == '.'));
+        debug_assert!(self.cur().is_some_and(|c| c == b'.'));
         // Check for eof
         let next = match self.input().peek() {
             Some(next) => next,
@@ -1849,7 +1855,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
         self.bump(); // 1st `.`
 
-        if next == '.' && self.input().peek() == Some('.') {
+        if next == b'.' && self.input().peek() == Some(b'.') {
             self.bump(); // 2nd `.`
             self.bump(); // 3rd `.`
 
@@ -1864,7 +1870,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
     /// This is extracted as a method to reduce size of `read_token`.
     #[inline(never)]
     fn read_token_question_mark(&mut self) -> LexResult<Self::Token> {
-        debug_assert!(self.cur().is_some_and(|c| c == '?'));
+        debug_assert!(self.cur().is_some_and(|c| c == b'?'));
         self.bump();
         if self.input_mut().eat_byte(b'?') {
             if self.input_mut().eat_byte(b'=') {
@@ -1882,7 +1888,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
     /// This is extracted as a method to reduce size of `read_token`.
     #[inline(never)]
     fn read_token_colon(&mut self) -> LexResult<Self::Token> {
-        debug_assert!(self.cur().is_some_and(|c| c == ':'));
+        debug_assert!(self.cur().is_some_and(|c| c == b':'));
         self.bump(); // ':'
         Ok(Self::Token::COLON)
     }
@@ -1892,13 +1898,13 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
     /// This is extracted as a method to reduce size of `read_token`.
     #[inline(never)]
     fn read_token_zero(&mut self) -> LexResult<Self::Token> {
-        debug_assert_eq!(self.cur(), Some('0'));
+        debug_assert_eq!(self.cur(), Some(b'0'));
         let next = self.input().peek();
 
         let bigint = match next {
-            Some('x') | Some('X') => self.read_radix_number::<16>(),
-            Some('o') | Some('O') => self.read_radix_number::<8>(),
-            Some('b') | Some('B') => self.read_radix_number::<2>(),
+            Some(b'x') | Some(b'X') => self.read_radix_number::<16>(),
+            Some(b'o') | Some(b'O') => self.read_radix_number::<8>(),
+            Some(b'b') | Some(b'B') => self.read_radix_number::<2>(),
             _ => {
                 return self.read_number::<false, true>().map(|v| match v {
                     Left((value, raw)) => Self::Token::num(value, raw, self),
@@ -1944,13 +1950,13 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
         }
 
         // '||', '&&'
-        if self.input().cur() == Some(C as char) {
+        if self.input().cur() == Some(C) {
             unsafe {
                 // Safety: cur() is Some(c)
                 self.input_mut().bump();
             }
 
-            if self.input().cur() == Some('=') {
+            if self.input().cur() == Some(b'=') {
                 unsafe {
                     // Safety: cur() is Some('=')
                     self.input_mut().bump();
@@ -1990,7 +1996,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
     /// This is extracted as a method to reduce size of `read_token`.
     #[inline(never)]
     fn read_token_mul_mod(&mut self, is_mul: bool) -> LexResult<Self::Token> {
-        debug_assert!(self.cur().is_some_and(|c| c == '*' || c == '%'));
+        debug_assert!(self.cur().is_some_and(|c| c == b'*' || c == b'%'));
         self.bump();
         let token = if is_mul {
             if self.input_mut().eat_byte(b'*') {
@@ -2019,7 +2025,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
 
     #[inline(never)]
     fn read_slash(&mut self) -> LexResult<Self::Token> {
-        debug_assert_eq!(self.cur(), Some('/'));
+        debug_assert_eq!(self.cur(), Some(b'/'));
         self.bump(); // '/'
         Ok(if self.eat(b'=') {
             Self::Token::DIV_EQ
@@ -2047,9 +2053,9 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
     /// See https://tc39.github.io/ecma262/#sec-literals-string-literals
     // TODO: merge `read_str_lit` and `read_jsx_str`
     fn read_str_lit(&mut self) -> LexResult<Self::Token> {
-        debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"'));
+        debug_assert!(self.cur() == Some(b'\'') || self.cur() == Some(b'"'));
         let start = self.cur_pos();
-        let quote = self.cur().unwrap() as u8;
+        let quote = self.cur().unwrap();
 
         self.bump(); // '"' or '\''
 
diff --git a/crates/swc_ecma_lexer/src/lexer/jsx.rs b/crates/swc_ecma_lexer/src/lexer/jsx.rs
index 5fbc92266b80..82825c00459f 100644
--- a/crates/swc_ecma_lexer/src/lexer/jsx.rs
+++ b/crates/swc_ecma_lexer/src/lexer/jsx.rs
@@ -12,7 +12,7 @@ impl Lexer<'_> {
 
         loop {
             let cur = match self.input.cur() {
-                Some(c) => c,
+                Some(c) => c as char,
                 None => {
                     let start = self.state.start;
                     self.error(start, SyntaxError::UnterminatedJSXContents)?
diff --git a/crates/swc_ecma_lexer/src/lexer/mod.rs b/crates/swc_ecma_lexer/src/lexer/mod.rs
index 1f245fc57a0c..49e5be16bfc5 100644
--- a/crates/swc_ecma_lexer/src/lexer/mod.rs
+++ b/crates/swc_ecma_lexer/src/lexer/mod.rs
@@ -176,7 +176,7 @@ impl<'a> Lexer<'a> {
         }
 
         // '++', '--'
-        Ok(if self.input.cur() == Some(C as char) {
+        Ok(if self.input.cur() == Some(C) {
             unsafe {
                 // Safety: cur() is Some(c)
                 self.input.bump();
@@ -273,7 +273,10 @@ impl Lexer<'_> {
         }
 
         // XML style comment. `<!--`
-        if C == b'<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-')
+        if C == b'<'
+            && self.is(b'!')
+            && self.peek() == Some(b'-')
+            && self.peek_ahead() == Some(b'-')
         {
             self.skip_line_comment(3);
             self.skip_space::<true>();
@@ -289,7 +292,7 @@ impl Lexer<'_> {
         };
 
         // '<<', '>>'
-        if self.cur() == Some(C as char) {
+        if self.cur() == Some(C) {
             self.bump();
             op = if C == b'<' {
                 BinOpToken::LShift
@@ -298,7 +301,7 @@ impl Lexer<'_> {
             };
 
             //'>>>'
-            if C == b'>' && self.cur() == Some(C as char) {
+            if C == b'>' && self.cur() == Some(C) {
                 self.bump();
                 op = BinOpToken::ZeroFillRShift;
             }
diff --git a/crates/swc_ecma_lexer/src/lexer/state.rs b/crates/swc_ecma_lexer/src/lexer/state.rs
index 8dc108697f8a..7731423f510b 100644
--- a/crates/swc_ecma_lexer/src/lexer/state.rs
+++ b/crates/swc_ecma_lexer/src/lexer/state.rs
@@ -801,6 +801,7 @@ impl Lexer<'_> {
 
             let c = self.cur();
             if let Some(c) = c {
+                let c = c as char;
                 if self.state.context.current() == Some(TokenContext::JSXOpeningTag)
                     || self.state.context.current() == Some(TokenContext::JSXClosingTag)
                 {
@@ -823,7 +824,7 @@ impl Lexer<'_> {
                     }
                 }
 
-                if c == '<' && self.state.is_expr_allowed && self.input.peek() != Some('!') {
+                if c == '<' && self.state.is_expr_allowed && self.input.peek() != Some(b'!') {
                     let had_line_break_before_last = self.had_line_break_before_last();
                     let cur_pos = self.input.cur_pos();
 
diff --git a/crates/swc_ecma_lexer/src/lexer/table.rs b/crates/swc_ecma_lexer/src/lexer/table.rs
index 798c5194a371..bd8b16aa6c26 100644
--- a/crates/swc_ecma_lexer/src/lexer/table.rs
+++ b/crates/swc_ecma_lexer/src/lexer/table.rs
@@ -48,13 +48,14 @@ const EOF: ByteHandler = Some(|lexer| {
 
 const ERR: ByteHandler = Some(|lexer| {
     let c = unsafe {
-        // Safety: Byte handler is only called for non-last chracters
-        lexer.input.cur().unwrap_unchecked()
+        // Safety: Byte handler is only called for non-last characters
+        // Get the char representation for error messages
+        lexer.cur_as_char().unwrap_unchecked()
     };
 
     let start = lexer.cur_pos();
     unsafe {
-        // Safety: Byte handler is only called for non-last chracters
+        // Safety: Byte handler is only called for non-last characters
         lexer.input.bump();
     }
     lexer.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?
@@ -351,7 +352,7 @@ const ZER: ByteHandler = Some(|lexer| lexer.read_token_zero());
 
 /// Numbers
 const DIG: ByteHandler = Some(|lexer| {
-    debug_assert!(lexer.cur().is_some_and(|cur| cur != '0'));
+    debug_assert!(lexer.cur().is_some_and(|cur| cur != b'0'));
     lexer.read_number::<false, false>().map(|v| match v {
         Either::Left((value, raw)) => Token::Num { value, raw },
         Either::Right((value, raw)) => Token::BigInt { value, raw },
@@ -361,11 +362,12 @@ const DIG: ByteHandler = Some(|lexer| {
 /// String literals with `'` or `"`
 const QOT: ByteHandler = Some(|lexer| lexer.read_str_lit());
 
-/// Unicode
+/// Unicode - handles multi-byte UTF-8 sequences
 const UNI: ByteHandler = Some(|lexer| {
     let c = unsafe {
-        // Safety: Byte handler is only called for non-last chracters
-        lexer.input.cur().unwrap_unchecked()
+        // Safety: Byte handler is only called for non-last characters
+        // For non-ASCII bytes, we need the full char
+        lexer.cur_as_char().unwrap_unchecked()
     };
 
     // Identifier or keyword. '\uXXXX' sequences are allowed in
@@ -376,7 +378,7 @@ const UNI: ByteHandler = Some(|lexer| {
 
     let start = lexer.cur_pos();
     unsafe {
-        // Safety: Byte handler is only called for non-last chracters
+        // Safety: Byte handler is only called for non-last characters
         lexer.input.bump();
     }
     lexer.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?
diff --git a/crates/swc_ecma_parser/src/lexer/char_ext.rs b/crates/swc_ecma_parser/src/lexer/char_ext.rs
index 8f7dc37c2573..873dc36f94d0 100644
--- a/crates/swc_ecma_parser/src/lexer/char_ext.rs
+++ b/crates/swc_ecma_parser/src/lexer/char_ext.rs
@@ -1,8 +1,8 @@
-/// Implemented for `char`.
+/// Implemented for `u8` and `char` - operates on bytes for performance.
 pub trait CharExt: Copy {
     fn to_char(self) -> Option<char>;
 
-    /// Test whether a given character code starts an identifier.
+    /// Test whether a given byte/character starts an identifier.
     ///
     /// https://tc39.github.io/ecma262/#prod-IdentifierStart
     #[inline]
@@ -14,7 +14,7 @@ pub trait CharExt: Copy {
         swc_ecma_ast::Ident::is_valid_start(c)
     }
 
-    /// Test whether a given character is part of an identifier.
+    /// Test whether a given byte/character is part of an identifier.
     #[inline]
     fn is_ident_part(self) -> bool {
         let c = match self.to_char() {
@@ -35,6 +35,20 @@ pub trait CharExt: Copy {
     }
 }
 
+impl CharExt for u8 {
+    #[inline(always)]
+    fn to_char(self) -> Option<char> {
+        // For ASCII bytes, this is a fast path
+        if self <= 0x7f {
+            Some(self as char)
+        } else {
+            // For non-ASCII bytes, we can't convert a single byte to a char
+            // The caller should use cur_as_char() on the Input trait instead
+            None
+        }
+    }
+}
+
 impl CharExt for char {
     #[inline(always)]
     fn to_char(self) -> Option<char> {
diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs
index d84579c7c0ce..c5c28fc43295 100644
--- a/crates/swc_ecma_parser/src/lexer/mod.rs
+++ b/crates/swc_ecma_parser/src/lexer/mod.rs
@@ -250,7 +250,7 @@ impl<'a> Lexer<'a> {
         }
 
         // '++', '--'
-        Ok(if self.input.cur() == Some(C as char) {
+        Ok(if self.input.cur() == Some(C) {
             unsafe {
                 // Safety: cur() is Some(c)
                 self.input.bump();
@@ -344,7 +344,10 @@ impl Lexer<'_> {
         }
 
         // XML style comment. `<!--`
-        if C == b'<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-')
+        if C == b'<'
+            && self.is(b'!')
+            && self.peek() == Some(b'-')
+            && self.peek_ahead() == Some(b'-')
         {
             self.skip_line_comment(3);
             self.skip_space();
@@ -356,7 +359,7 @@ impl Lexer<'_> {
         let mut op = if C == b'<' { Token::Lt } else { Token::Gt };
 
         // '<<', '>>'
-        if self.cur() == Some(C as char) {
+        if self.cur() == Some(C) {
             self.bump();
             op = if C == b'<' {
                 Token::LShift
@@ -365,7 +368,7 @@ impl Lexer<'_> {
             };
 
             //'>>>'
-            if C == b'>' && self.cur() == Some(C as char) {
+            if C == b'>' && self.cur() == Some(C) {
                 self.bump();
                 op = Token::ZeroFillRShift;
             }
@@ -416,7 +419,7 @@ impl Lexer<'_> {
         start: BytePos,
         started_with_backtick: bool,
     ) -> LexResult<Token> {
-        debug_assert!(self.cur() == Some(if started_with_backtick { '`' } else { '}' }));
+        debug_assert!(self.cur() == Some(if started_with_backtick { b'`' } else { b'}' }));
         let mut cooked = Ok(Wtf8Buf::with_capacity(8));
         self.bump(); // `}` or `\``
         let mut cooked_slice_start = self.cur_pos();
@@ -440,7 +443,7 @@ impl Lexer<'_> {
         }
 
         while let Some(c) = self.cur() {
-            if c == '`' {
+            if c == b'`' {
                 consume_cooked!();
                 let cooked = cooked.map(|cooked| self.atoms.wtf8_atom(&*cooked));
                 let raw = raw_atom(self);
@@ -452,7 +455,7 @@ impl Lexer<'_> {
                     self.set_token_value(Some(TokenValue::Template { raw, cooked }));
                     Token::TemplateTail
                 });
-            } else if c == '$' && self.input.peek() == Some('{') {
+            } else if c == b'$' && self.input.peek() == Some(b'{') {
                 consume_cooked!();
                 let cooked = cooked.map(|cooked| self.atoms.wtf8_atom(&*cooked));
                 let raw = raw_atom(self);
@@ -464,7 +467,7 @@ impl Lexer<'_> {
                     self.set_token_value(Some(TokenValue::Template { raw, cooked }));
                     Token::TemplateMiddle
                 });
-            } else if c == '\\' {
+            } else if c == b'\\' {
                 consume_cooked!();
 
                 match self.read_escaped_char(true) {
@@ -483,11 +486,18 @@ impl Lexer<'_> {
             } else if c.is_line_terminator() {
                 consume_cooked!();
 
-                let c = if c == '\r' && self.peek() == Some('\n') {
+                // For line terminators, we need the full char (can be multi-byte UTF-8)
+                let c_char = if c <= 0x7f {
+                    c as char
+                } else {
+                    self.cur_as_char().unwrap()
+                };
+
+                let c = if c == b'\r' && self.peek() == Some(b'\n') {
                     self.bump(); // '\r'
                     '\n'
                 } else {
-                    match c {
+                    match c_char {
                         '\n' => '\n',
                         '\r' => '\n',
                         '\u{2028}' => '\u{2028}',
@@ -555,20 +565,25 @@ impl<'a> Lexer<'a> {
     }
 
     #[inline(always)]
-    fn cur(&self) -> Option<char> {
+    fn cur(&self) -> Option<u8> {
         self.input().cur()
     }
 
     #[inline(always)]
-    fn peek(&self) -> Option<char> {
+    fn peek(&self) -> Option<u8> {
         self.input().peek()
     }
 
     #[inline(always)]
-    fn peek_ahead(&self) -> Option<char> {
+    fn peek_ahead(&self) -> Option<u8> {
         self.input().peek_ahead()
     }
 
+    #[inline(always)]
+    fn cur_as_char(&self) -> Option<char> {
+        self.input().cur_as_char()
+    }
+
     #[inline(always)]
     fn cur_pos(&self) -> BytePos {
         self.input().cur_pos()
@@ -744,8 +759,8 @@ impl<'a> Lexer<'a> {
     fn skip_block_comment(&mut self) {
         let start = self.cur_pos();
 
-        debug_assert_eq!(self.cur(), Some('/'));
-        debug_assert_eq!(self.peek(), Some('*'));
+        debug_assert_eq!(self.cur(), Some(b'/'));
+        debug_assert_eq!(self.peek(), Some(b'*'));
 
         // Consume initial "/*"
         self.input_mut().bump_bytes(2);
@@ -883,24 +898,24 @@ impl<'a> Lexer<'a> {
         let mut prev = None;
 
         while let Some(c) = self.cur() {
-            if c == '_' {
+            if c == b'_' {
                 *has_underscore = true;
                 if allow_num_separator {
-                    let is_allowed = |c: Option<char>| {
+                    let is_allowed = |c: Option<u8>| {
                         let Some(c) = c else {
                             return false;
                         };
-                        c.is_digit(RADIX as _)
+                        (c as char).is_digit(RADIX as _)
                     };
-                    let is_forbidden = |c: Option<char>| {
+                    let is_forbidden = |c: Option<u8>| {
                         let Some(c) = c else {
                             return false;
                         };
 
                         if RADIX == 16 {
-                            matches!(c, '.' | 'X' | '_' | 'x')
+                            matches!(c, b'.' | b'X' | b'_' | b'x')
                         } else {
-                            matches!(c, '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o')
+                            matches!(c, b'.' | b'B' | b'E' | b'O' | b'_' | b'b' | b'e' | b'o')
                         }
                     };
 
@@ -924,7 +939,7 @@ impl<'a> Lexer<'a> {
             }
 
             // e.g. (val for a) = 10  where radix = 16
-            let val = if let Some(val) = c.to_digit(RADIX as _) {
+            let val = if let Some(val) = (c as char).to_digit(RADIX as _) {
                 val
             } else {
                 return Ok(total);
@@ -1000,7 +1015,7 @@ impl<'a> Lexer<'a> {
         let lazy_integer = if START_WITH_DOT {
             // first char is '.'
             debug_assert!(
-                self.cur().is_some_and(|c| c == '.'),
+                self.cur().is_some_and(|c| c == b'.'),
                 "read_number<START_WITH_DOT = true> expects current char to be '.'"
             );
             LazyInteger {
@@ -1011,7 +1026,7 @@ impl<'a> Lexer<'a> {
             }
         } else {
             debug_assert!(!START_WITH_DOT);
-            debug_assert!(!START_WITH_ZERO || self.cur().unwrap() == '0');
+            debug_assert!(!START_WITH_ZERO || self.cur().unwrap() == b'0');
 
             // Use read_number_no_dot to support long numbers.
             let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
@@ -1076,7 +1091,7 @@ impl<'a> Lexer<'a> {
         has_underscore |= lazy_integer.has_underscore;
         // At this point, number cannot be an octal literal.
 
-        let has_dot = self.cur() == Some('.');
+        let has_dot = self.cur() == Some(b'.');
         //  `0.a`, `08.a`, `102.a` are invalid.
         //
         // `.1.a`, `.1e-4.a` are valid,
@@ -1090,7 +1105,7 @@ impl<'a> Lexer<'a> {
             self.read_digits::<_, (), 10>(|_, _, _| Ok(((), true)), true, &mut has_underscore)?;
         }
 
-        let has_e = self.cur().is_some_and(|c| c == 'e' || c == 'E');
+        let has_e = self.cur().is_some_and(|c| c == b'e' || c == b'E');
         // Handle 'e' and 'E'
         //
         // .5e1 = 5
@@ -1108,7 +1123,7 @@ impl<'a> Lexer<'a> {
                 }
             };
 
-            if next == '+' || next == '-' {
+            if next == b'+' || next == b'-' {
                 self.bump(); // remove '+', '-'
             }
 
@@ -1178,12 +1193,12 @@ impl<'a> Lexer<'a> {
         );
         let start = self.cur_pos();
 
-        debug_assert_eq!(self.cur(), Some('0'));
+        debug_assert_eq!(self.cur(), Some(b'0'));
         self.bump();
 
         debug_assert!(self
             .cur()
-            .is_some_and(|c| matches!(c, 'b' | 'B' | 'o' | 'O' | 'x' | 'X')));
+            .is_some_and(|c| matches!(c, b'b' | b'B' | b'o' | b'O' | b'x' | b'X')));
         self.bump();
 
         let lazy_integer = self.read_number_no_dot_as_str::<RADIX>()?;
@@ -1274,7 +1289,7 @@ impl<'a> Lexer<'a> {
 
         let mut s = SmartString::<LazyCompact>::default();
 
-        debug_assert!(self.input().cur().is_some_and(|c| c == '&'));
+        debug_assert!(self.input().cur().is_some_and(|c| c == b'&'));
         self.bump();
 
         let start_pos = self.input().cur_pos();
@@ -1286,7 +1301,7 @@ impl<'a> Lexer<'a> {
             };
             self.bump();
 
-            if c == ';' {
+            if c == b';' {
                 if let Some(stripped) = s.strip_prefix('#') {
                     if stripped.starts_with('x') {
                         if is_hex(&s[2..]) {
@@ -1306,7 +1321,7 @@ impl<'a> Lexer<'a> {
                 break;
             }
 
-            s.push(c)
+            s.push(c as char)
         }
 
         unsafe {
@@ -1319,10 +1334,10 @@ impl<'a> Lexer<'a> {
 
     fn read_jsx_new_line(&mut self, normalize_crlf: bool) -> LexResult<Either<&'static str, char>> {
         debug_assert!(self.syntax().jsx());
-        let ch = self.input().cur().unwrap();
+        let ch = self.input().cur_as_char().unwrap();
         self.bump();
 
-        let out = if ch == '\r' && self.input().cur() == Some('\n') {
+        let out = if ch == '\r' && self.input().cur() == Some(b'\n') {
             self.bump(); // `\n`
             Either::Left(if normalize_crlf { "\n" } else { "\r\n" })
         } else {
@@ -1341,7 +1356,7 @@ impl<'a> Lexer<'a> {
         let mut out = String::new();
         let mut chunk_start = self.input().cur_pos();
         loop {
-            let ch = match self.input().cur() {
+            let ch = match self.input().cur_as_char() {
                 Some(c) => c,
                 None => {
                     self.emit_error(start, SyntaxError::UnterminatedStrLit);
@@ -1461,8 +1476,8 @@ impl<'a> Lexer<'a> {
         // returned `Some`, and already exited.
         debug_assert!(high >= MIN_HIGH);
         let is_pair = high <= MAX_HIGH
-            && self.input().cur() == Some('\\')
-            && self.input().peek() == Some('u');
+            && self.input().cur() == Some(b'\\')
+            && self.input().peek() == Some(b'u');
         if !is_pair {
             return Ok(Some(UnicodeEscape::LoneSurrogate(high)));
         }
@@ -1496,7 +1511,7 @@ impl<'a> Lexer<'a> {
     }
 
     fn read_unicode_escape(&mut self) -> LexResult<UnicodeEscape> {
-        debug_assert_eq!(self.cur(), Some('u'));
+        debug_assert_eq!(self.cur(), Some(b'u'));
 
         let mut is_curly = false;
 
@@ -1580,7 +1595,7 @@ impl<'a> Lexer<'a> {
 
     #[cold]
     fn read_shebang(&mut self) -> LexResult<Option<Atom>> {
-        if self.input().cur() != Some('#') || self.input().peek() != Some('!') {
+        if self.input().cur() != Some(b'#') || self.input().peek() != Some(b'!') {
             return Ok(None);
         }
         self.bump(); // `#`
@@ -1593,13 +1608,13 @@ impl<'a> Lexer<'a> {
     ///
     /// In template literal, we should preserve raw string.
     fn read_escaped_char(&mut self, in_template: bool) -> LexResult<Option<CodePoint>> {
-        debug_assert_eq!(self.cur(), Some('\\'));
+        debug_assert_eq!(self.cur(), Some(b'\\'));
 
         let start = self.cur_pos();
 
         self.bump(); // '\'
 
-        let c = match self.cur() {
+        let c = match self.cur_as_char() {
             Some(c) => c,
             None => self.error_span(pos_span(start), SyntaxError::InvalidStrEscape)?,
         };
@@ -1654,7 +1669,7 @@ impl<'a> Lexer<'a> {
 
                 let first_c = if c == '0' {
                     match self.cur() {
-                        Some(next) if next.is_digit(8) => c,
+                        Some(next) if (next as char).is_digit(8) => c,
                         // \0 is not an octal literal nor decimal literal.
                         _ => return Ok(Some(CodePoint::from_char('\u{0000}'))),
                     }
@@ -1675,7 +1690,7 @@ impl<'a> Lexer<'a> {
                     ($check:expr) => {{
                         let cur = self.cur();
 
-                        match cur.and_then(|c| c.to_digit(8)) {
+                        match cur.and_then(|c| (c as char).to_digit(8)) {
                             Some(v) => {
                                 value = if $check {
                                     let new_val = value
@@ -1719,7 +1734,7 @@ impl<'a> Lexer<'a> {
             self.input_mut().reset_to(start);
         }
 
-        debug_assert_eq!(self.cur(), Some('/'));
+        debug_assert_eq!(self.cur(), Some(b'/'));
 
         let start = self.cur_pos();
 
@@ -1745,14 +1760,14 @@ impl<'a> Lexer<'a> {
                 escaped = false;
             } else {
                 match c {
-                    '[' => in_class = true,
-                    ']' if in_class => in_class = false,
+                    b'[' => in_class = true,
+                    b']' if in_class => in_class = false,
                     // Terminates content part of regex literal
-                    '/' if !in_class => break,
+                    b'/' if !in_class => break,
                     _ => {}
                 }
 
-                escaped = c == '\\';
+                escaped = c == b'\\';
             }
 
             self.bump();
@@ -1921,7 +1936,7 @@ impl<'a> Lexer<'a> {
 
                 // ASCII but not a valid identifier
                 break;
-            } else if let Some(c) = self.input().cur() {
+            } else if let Some(c) = self.input().cur_as_char() {
                 if Ident::is_valid_non_ascii_continue(c) {
                     self.bump();
                     continue;
@@ -1954,14 +1969,14 @@ impl<'a> Lexer<'a> {
 
     /// `#`
     fn read_token_number_sign(&mut self) -> LexResult<Token> {
-        debug_assert!(self.cur().is_some_and(|c| c == '#'));
+        debug_assert!(self.cur().is_some_and(|c| c == b'#'));
 
         self.bump(); // '#'
 
         // `#` can also be a part of shebangs, however they should have been
         // handled by `read_shebang()`
         debug_assert!(
-            !self.input().is_at_start() || self.cur() != Some('!'),
+            !self.input().is_at_start() || self.cur() != Some(b'!'),
             "#! should have already been handled by read_shebang()"
         );
         Ok(Token::Hash)
@@ -1971,7 +1986,7 @@ impl<'a> Lexer<'a> {
     ///
     /// This is extracted as a method to reduce size of `read_token`.
     fn read_token_dot(&mut self) -> LexResult<Token> {
-        debug_assert!(self.cur().is_some_and(|c| c == '.'));
+        debug_assert!(self.cur().is_some_and(|c| c == b'.'));
         // Check for eof
         let next = match self.input().peek() {
             Some(next) => next,
@@ -1989,7 +2004,7 @@ impl<'a> Lexer<'a> {
 
         self.bump(); // 1st `.`
 
-        if next == '.' && self.input().peek() == Some('.') {
+        if next == b'.' && self.input().peek() == Some(b'.') {
             self.bump(); // 2nd `.`
             self.bump(); // 3rd `.`
 
@@ -2003,7 +2018,7 @@ impl<'a> Lexer<'a> {
     ///
     /// This is extracted as a method to reduce size of `read_token`.
     fn read_token_question_mark(&mut self) -> LexResult<Token> {
-        debug_assert!(self.cur().is_some_and(|c| c == '?'));
+        debug_assert!(self.cur().is_some_and(|c| c == b'?'));
         self.bump();
         if self.input_mut().eat_byte(b'?') {
             if self.input_mut().eat_byte(b'=') {
@@ -2020,7 +2035,7 @@ impl<'a> Lexer<'a> {
     ///
     /// This is extracted as a method to reduce size of `read_token`.
     fn read_token_colon(&mut self) -> LexResult<Token> {
-        debug_assert!(self.cur().is_some_and(|c| c == ':'));
+        debug_assert!(self.cur().is_some_and(|c| c == b':'));
         self.bump(); // ':'
         Ok(Token::Colon)
     }
@@ -2029,13 +2044,13 @@ impl<'a> Lexer<'a> {
     ///
     /// This is extracted as a method to reduce size of `read_token`.
     fn read_token_zero(&mut self) -> LexResult<Token> {
-        debug_assert_eq!(self.cur(), Some('0'));
+        debug_assert_eq!(self.cur(), Some(b'0'));
         let next = self.input().peek();
 
         let bigint = match next {
-            Some('x') | Some('X') => self.read_radix_number::<16>(),
-            Some('o') | Some('O') => self.read_radix_number::<8>(),
-            Some('b') | Some('B') => self.read_radix_number::<2>(),
+            Some(b'x') | Some(b'X') => self.read_radix_number::<16>(),
+            Some(b'o') | Some(b'O') => self.read_radix_number::<8>(),
+            Some(b'b') | Some(b'B') => self.read_radix_number::<2>(),
             _ => {
                 return self.read_number::<false, true>().map(|v| match v {
                     Left((value, raw)) => Token::num(value, raw, self),
@@ -2080,13 +2095,13 @@ impl<'a> Lexer<'a> {
         }
 
         // '||', '&&'
-        if self.input().cur() == Some(C as char) {
+        if self.input().cur() == Some(C) {
             unsafe {
                 // Safety: cur() is Some(c)
                 self.input_mut().bump();
             }
 
-            if self.input().cur() == Some('=') {
+            if self.input().cur() == Some(b'=') {
                 unsafe {
                     // Safety: cur() is Some('=')
                     self.input_mut().bump();
@@ -2125,7 +2140,7 @@ impl<'a> Lexer<'a> {
     ///
     /// This is extracted as a method to reduce size of `read_token`.
     fn read_token_mul_mod<const IS_MUL: bool>(&mut self) -> LexResult<Token> {
-        debug_assert!(self.cur().is_some_and(|c| c == '*' || c == '%'));
+        debug_assert!(self.cur().is_some_and(|c| c == b'*' || c == b'%'));
         self.bump();
         let token = if IS_MUL {
             if self.input_mut().eat_byte(b'*') {
@@ -2153,7 +2168,7 @@ impl<'a> Lexer<'a> {
     }
 
     fn read_slash(&mut self) -> LexResult<Token> {
-        debug_assert_eq!(self.cur(), Some('/'));
+        debug_assert_eq!(self.cur(), Some(b'/'));
         self.bump(); // '/'
         Ok(if self.eat(b'=') {
             Token::DivEq
@@ -2181,9 +2196,9 @@ impl<'a> Lexer<'a> {
     /// See https://tc39.github.io/ecma262/#sec-literals-string-literals
     // TODO: merge `read_str_lit` and `read_jsx_str`
     fn read_str_lit(&mut self) -> LexResult<Token> {
-        debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"'));
+        debug_assert!(self.cur() == Some(b'\'') || self.cur() == Some(b'"'));
         let start = self.cur_pos();
-        let quote = self.cur().unwrap() as u8;
+        let quote = self.cur().unwrap();
 
         self.bump(); // '"' or '\''
 
diff --git a/crates/swc_ecma_parser/src/lexer/state.rs b/crates/swc_ecma_parser/src/lexer/state.rs
index 9844a5f1ffb4..e52ebdd07227 100644
--- a/crates/swc_ecma_parser/src/lexer/state.rs
+++ b/crates/swc_ecma_parser/src/lexer/state.rs
@@ -246,8 +246,8 @@ impl crate::input::Tokens for Lexer<'_> {
         debug_assert!(token.is_word());
         let mut v = String::with_capacity(16);
         while let Some(ch) = self.input().cur() {
-            if ch == '-' {
-                v.push(ch);
+            if ch == b'-' {
+                v.push(ch as char);
                 self.bump();
             } else {
                 let old_pos = self.cur_pos();
@@ -296,8 +296,8 @@ impl crate::input::Tokens for Lexer<'_> {
         let start = self.cur_pos();
 
         match cur {
-            '\'' | '"' => {
-                let token = self.read_jsx_str(cur);
+            b'\'' | b'"' => {
+                let token = self.read_jsx_str(cur as char);
                 let token = match token {
                     Ok(token) => token,
                     Err(e) => {
@@ -417,21 +417,21 @@ impl Lexer<'_> {
         let mut value = String::new();
 
         while let Some(ch) = self.input_mut().cur() {
-            if ch == '{' {
+            if ch == b'{' {
                 break;
-            } else if ch == '<' {
+            } else if ch == b'<' {
                 // TODO: check git conflict mark
                 break;
             }
 
-            if ch == '>' {
+            if ch == b'>' {
                 self.emit_error(
                     self.input().cur_pos(),
                     SyntaxError::UnexpectedTokenWithSuggestions {
                         candidate_list: vec!["`{'>'}`", "`&gt;`"],
                     },
                 );
-            } else if ch == '}' {
+            } else if ch == b'}' {
                 self.emit_error(
                     self.input().cur_pos(),
                     SyntaxError::UnexpectedTokenWithSuggestions {
@@ -447,11 +447,11 @@ impl Lexer<'_> {
                 && first_non_whitespace > 0
             {
                 break;
-            } else if ch.is_whitespace() {
+            } else if ch <= 0x7f && (ch as char).is_whitespace() {
                 first_non_whitespace = self.cur_pos().0 as i32;
             }
 
-            if ch == '&' {
+            if ch == b'&' {
                 let s = unsafe {
                     // Safety: We already checked for the range
                     self.input_slice_to_cur(chunk_start)
@@ -508,28 +508,43 @@ impl Lexer<'_> {
     fn scan_identifier_parts(&mut self) -> String {
         let mut v = String::with_capacity(16);
         while let Some(ch) = self.input().cur() {
-            if ch.is_ident_part() {
-                v.push(ch);
-                self.input_mut().bump_bytes(ch.len_utf8());
-            } else if ch == '\\' {
-                self.bump(); // bump '\'
-                if !self.is(b'u') {
-                    self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
-                    continue;
-                }
-                self.bump(); // bump 'u'
-                let Ok(value) = self.read_unicode_escape() else {
-                    self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
-                    break;
-                };
-                if let Some(c) = CodePoint::from(value).to_char() {
-                    v.push(c);
+            // For ASCII, check if it's an identifier part quickly
+            if ch <= 0x7f {
+                if ch.is_ident_part() {
+                    v.push(ch as char);
+                    self.input_mut().bump_bytes(1);
+                } else if ch == b'\\' {
+                    self.bump(); // bump '\'
+                    if !self.is(b'u') {
+                        self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
+                        continue;
+                    }
+                    self.bump(); // bump 'u'
+                    let Ok(value) = self.read_unicode_escape() else {
+                        self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
+                        break;
+                    };
+                    if let Some(c) = CodePoint::from(value).to_char() {
+                        v.push(c);
+                    } else {
+                        self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
+                    }
+                    self.token_flags |= TokenFlags::UNICODE;
                 } else {
-                    self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
+                    break;
                 }
-                self.token_flags |= TokenFlags::UNICODE;
             } else {
-                break;
+                // Non-ASCII byte - need to get full UTF-8 character
+                if let Some(c) = self.input().cur_as_char() {
+                    if c.is_ident_part() {
+                        v.push(c);
+                        self.bump();
+                    } else {
+                        break;
+                    }
+                } else {
+                    break;
+                }
             }
         }
         v
diff --git a/crates/swc_ecma_parser/src/lexer/table.rs b/crates/swc_ecma_parser/src/lexer/table.rs
index 12ad8b25a2a6..37e33537afbb 100644
--- a/crates/swc_ecma_parser/src/lexer/table.rs
+++ b/crates/swc_ecma_parser/src/lexer/table.rs
@@ -42,13 +42,14 @@ pub(super) static BYTE_HANDLERS: [ByteHandler; 256] = [
 
 const ERR: ByteHandler = |lexer| {
     let c = unsafe {
-        // Safety: Byte handler is only called for non-last chracters
-        lexer.input.cur().unwrap_unchecked()
+        // Safety: Byte handler is only called for non-last characters
+        // Get the char representation for error messages
+        lexer.input.cur_as_char().unwrap_unchecked()
     };
 
     let start = lexer.cur_pos();
     unsafe {
-        // Safety: Byte handler is only called for non-last chracters
+        // Safety: Byte handler is only called for non-last characters
         lexer.input.bump();
     }
     lexer.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?
@@ -281,7 +282,7 @@ const ZER: ByteHandler = |lexer| lexer.read_token_zero();
 
 /// Numbers
 const DIG: ByteHandler = |lexer| {
-    debug_assert!(lexer.cur().is_some_and(|cur| cur != '0'));
+    debug_assert!(lexer.cur().is_some_and(|cur| cur != b'0'));
     lexer.read_number::<false, false>().map(|v| match v {
         Either::Left((value, raw)) => {
             lexer.state.set_token_value(TokenValue::Num { value, raw });
@@ -299,11 +300,12 @@ const DIG: ByteHandler = |lexer| {
 /// String literals with `'` or `"`
 const QOT: ByteHandler = |lexer| lexer.read_str_lit();
 
-/// Unicode
+/// Unicode - handles multi-byte UTF-8 sequences
 const UNI: ByteHandler = |lexer| {
     let c = unsafe {
-        // Safety: Byte handler is only called for non-last chracters
-        lexer.input.cur().unwrap_unchecked()
+        // Safety: Byte handler is only called for non-last characters
+        // For non-ASCII bytes, we need the full char
+        lexer.input.cur_as_char().unwrap_unchecked()
     };
 
     // Identifier or keyword. '\uXXXX' sequences are allowed in
@@ -314,7 +316,7 @@ const UNI: ByteHandler = |lexer| {
 
     let start = lexer.cur_pos();
     unsafe {
-        // Safety: Byte handler is only called for non-last chracters
+        // Safety: Byte handler is only called for non-last characters
         lexer.input.bump();
     }
     lexer.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?
diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs
index c4b66e15d49f..e423fc7457ed 100644
--- a/crates/swc_ecma_parser/src/lexer/whitespace.rs
+++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs
@@ -114,31 +114,33 @@ const SPC: ByteHandler = |lexer| {
 };
 
 const SLH: ByteHandler = |lexer| match lexer.peek() {
-    Some('/') => {
+    Some(b'/') => {
         lexer.skip_line_comment(2);
         true
     }
-    Some('*') => {
+    Some(b'*') => {
         lexer.skip_block_comment();
         true
     }
     _ => false,
 };
 
-/// Unicode
+/// Unicode - handles multi-byte UTF-8 whitespace characters
 const UNI: ByteHandler = |lexer| {
-    let c = lexer.cur().unwrap();
-    match c {
-        c if is_irregular_whitespace(c) => {
-            lexer.bump();
-            true
-        }
-        c if is_irregular_line_terminator(c) => {
-            lexer.bump();
-            lexer.state.mark_had_line_break();
-            true
-        }
-        _ => false,
+    // For non-ASCII bytes, we need the full UTF-8 character
+    let Some(c) = lexer.cur_as_char() else {
+        return false;
+    };
+
+    if is_irregular_whitespace(c) {
+        lexer.bump();
+        true
+    } else if is_irregular_line_terminator(c) {
+        lexer.bump();
+        lexer.state.mark_had_line_break();
+        true
+    } else {
+        false
     }
 };
 
diff --git a/crates/swc_html_parser/src/lexer/mod.rs b/crates/swc_html_parser/src/lexer/mod.rs
index d9e6b4094f30..7e0d0934821a 100644
--- a/crates/swc_html_parser/src/lexer/mod.rs
+++ b/crates/swc_html_parser/src/lexer/mod.rs
@@ -102,7 +102,7 @@ where
     I: Input<'a>,
 {
     input: I,
-    cur: Option<char>,
+    cur: Option<u8>,
     cur_pos: BytePos,
     last_token_pos: BytePos,
     finished: bool,
@@ -119,6 +119,9 @@ where
     character_reference_code: Option<Vec<(u8, u32, Option<char>)>>,
     temporary_buffer: String,
     is_adjusted_current_node_is_element_in_html_namespace: Option<bool>,
+    /// The full UTF-8 character corresponding to the current byte (for
+    /// non-ASCII)
+    current_char: Option<char>,
     phantom: std::marker::PhantomData<&'a ()>,
 }
 
@@ -149,16 +152,15 @@ where
             // Do this without a new allocation.
             temporary_buffer: String::with_capacity(33),
             is_adjusted_current_node_is_element_in_html_namespace: None,
+            current_char: None,
             phantom: std::marker::PhantomData,
         };
 
         // A leading Byte Order Mark (BOM) causes the character encoding argument to be
         // ignored and will itself be skipped.
-        if lexer.input.is_at_start() && lexer.input.cur() == Some('\u{feff}') {
-            unsafe {
-                // Safety: We know that the current character is '\u{feff}'.
-                lexer.input.bump();
-            }
+        if lexer.input.is_at_start() && lexer.input.cur_as_char() == Some('\u{feff}') {
+            // BOM (Byte Order Mark) is 3 bytes in UTF-8
+            lexer.input.bump_bytes(3);
         }
 
         lexer
@@ -216,7 +218,7 @@ where
     I: Input<'a>,
 {
     #[inline(always)]
-    fn next(&mut self) -> Option<char> {
+    fn next(&mut self) -> Option<u8> {
         self.input.cur()
     }
 
@@ -228,8 +230,13 @@ where
     // Postpone validation for each character for perf reasons and do it in
     // `anything else`
     #[inline(always)]
-    fn validate_input_stream_character(&mut self, c: char) {
-        let code = c as u32;
+    fn validate_input_stream_character(&mut self, c: u8) {
+        let ch = if is_non_ascii(c) {
+            self.current_char.unwrap_or(c as char)
+        } else {
+            c as char
+        };
+        let code = ch as u32;
 
         if is_surrogate(code) {
             self.emit_error(ErrorKind::SurrogateInInputStream);
@@ -245,10 +252,16 @@ where
         self.cur = self.input.cur();
         self.cur_pos = self.input.cur_pos();
 
-        if self.cur.is_some() {
-            unsafe {
-                // Safety: self.cur is Some()
-                self.input.bump();
+        if let Some(c) = self.cur {
+            // Optimize: use bump_bytes directly for ASCII (fast path)
+            if c < 0x80 {
+                self.input.bump_bytes(1);
+            } else {
+                // Non-ASCII: use bump() which calculates UTF-8 length
+                unsafe {
+                    // Safety: self.cur is Some()
+                    self.input.bump();
+                }
             }
         }
     }
@@ -268,17 +281,36 @@ where
     }
 
     #[inline(always)]
-    fn consume_next_char(&mut self) -> Option<char> {
+    fn consume_next_char(&mut self) -> Option<u8> {
         // The next input character is the first character in the input stream that has
         // not yet been consumed or explicitly ignored by the requirements in this
         // section. Initially, the next input character is the first character in the
         // input. The current input character is the last character to have been
         // consumed.
-        let c = self.next();
+        let c = self.next()?;
 
-        self.consume();
+        // Update current byte and position
+        self.cur = Some(c);
+        self.cur_pos = self.input.cur_pos();
 
-        c
+        // Optimize UTF-8 handling: decode only once and use the character's byte length
+        if is_non_ascii(c) {
+            // Non-ASCII: decode UTF-8 character once and use its length
+            if let Some(ch) = self.input.cur_as_char() {
+                self.input.bump_bytes(ch.len_utf8());
+                self.current_char = Some(ch);
+            } else {
+                // Fallback if decoding fails
+                self.input.bump_bytes(1);
+                self.current_char = Some(c as char);
+            }
+        } else {
+            // ASCII fast path: always 1 byte
+            self.input.bump_bytes(1);
+            self.current_char = Some(c as char);
+        }
+
+        Some(c)
     }
 
     #[cold]
@@ -403,51 +435,32 @@ where
         });
     }
 
-    fn append_raw_to_doctype_token(&mut self, c: char) {
+    fn append_raw_to_doctype_token(&mut self, c: u8) {
         let b = self.sub_buf.clone();
         let mut sub_buf = b.borrow_mut();
 
-        let is_cr = c == '\r';
+        let is_cr = c == b'\r';
 
         if is_cr {
-            sub_buf.push(c);
-
-            if self.input.cur() == Some('\n') {
-                unsafe {
-                    // Safety: cur() is Some('\n')
-                    self.input.bump();
-                }
+            sub_buf.push(c as char);
 
+            if self.input.cur() == Some(b'\n') {
+                // ASCII newline is always 1 byte
+                self.input.bump_bytes(1);
                 sub_buf.push('\n');
             }
         } else {
-            sub_buf.push(c);
-        }
-    }
-
-    fn append_to_doctype_token(
-        &mut self,
-        name: Option<char>,
-        public_id: Option<char>,
-        system_id: Option<char>,
-    ) {
-        let b = self.buf.clone();
-        let mut buf = b.borrow_mut();
-
-        if let Some(name) = name {
-            buf.push(name);
-        }
-
-        if let Some(public_id) = public_id {
-            buf.push(public_id);
-        }
+            let ch = if is_non_ascii(c) {
+                self.current_char.unwrap_or(c as char)
+            } else {
+                c as char
+            };
 
-        if let Some(system_id) = system_id {
-            buf.push(system_id);
+            sub_buf.push(ch);
         }
     }
 
-    fn consume_and_append_to_doctype_token_name<F>(&mut self, c: char, f: F)
+    fn consume_and_append_to_doctype_token_name<F>(&mut self, c: u8, f: F)
     where
         F: Fn(char) -> bool,
     {
@@ -456,8 +469,14 @@ where
         let b = self.sub_buf.clone();
         let mut sub_buf = b.borrow_mut();
 
-        buf.push(c.to_ascii_lowercase());
-        sub_buf.push(c);
+        let ch = if is_non_ascii(c) {
+            self.current_char.unwrap_or(c as char)
+        } else {
+            c as char
+        };
+
+        buf.push(ch.to_ascii_lowercase());
+        sub_buf.push(ch);
 
         let value = self.input.uncons_while(f);
 
@@ -465,7 +484,7 @@ where
         sub_buf.push_str(value);
     }
 
-    fn consume_and_append_to_doctype_token_public_id<F>(&mut self, c: char, f: F)
+    fn consume_and_append_to_doctype_token_public_id<F>(&mut self, c: u8, f: F)
     where
         F: Fn(char) -> bool,
     {
@@ -474,23 +493,26 @@ where
         let b = self.sub_buf.clone();
         let mut sub_buf = b.borrow_mut();
 
-        let is_cr = c == '\r';
+        let is_cr = c == b'\r';
 
         if is_cr {
             buf.push('\n');
-            sub_buf.push(c);
-
-            if self.input.cur() == Some('\n') {
-                unsafe {
-                    // Safety: cur() is Some('\n')
-                    self.input.bump();
-                }
+            sub_buf.push(c as char);
 
+            if self.input.cur() == Some(b'\n') {
+                // ASCII newline is always 1 byte
+                self.input.bump_bytes(1);
                 sub_buf.push('\n');
             }
         } else {
-            buf.push(c);
-            sub_buf.push(c);
+            let ch = if is_non_ascii(c) {
+                self.current_char.unwrap_or(c as char)
+            } else {
+                c as char
+            };
+
+            buf.push(ch);
+            sub_buf.push(ch);
         }
 
         let value = self.input.uncons_while(f);
@@ -499,7 +521,7 @@ where
         sub_buf.push_str(value);
     }
 
-    fn consume_and_append_to_doctype_token_system_id<F>(&mut self, c: char, f: F)
+    fn consume_and_append_to_doctype_token_system_id<F>(&mut self, c: u8, f: F)
     where
         F: Fn(char) -> bool,
     {
@@ -508,23 +530,26 @@ where
         let b = self.sub_buf.clone();
         let mut sub_buf = b.borrow_mut();
 
-        let is_cr = c == '\r';
+        let is_cr = c == b'\r';
 
         if is_cr {
             buf.push('\n');
-            sub_buf.push(c);
-
-            if self.input.cur() == Some('\n') {
-                unsafe {
-                    // Safety: cur() is Some('\n')
-                    self.input.bump();
-                }
+            sub_buf.push(c as char);
 
+            if self.input.cur() == Some(b'\n') {
+                // ASCII newline is always 1 byte
+                self.input.bump_bytes(1);
                 sub_buf.push('\n');
             }
         } else {
-            buf.push(c);
-            sub_buf.push(c);
+            let ch = if is_non_ascii(c) {
+                self.current_char.unwrap_or(c as char)
+            } else {
+                c as char
+            };
+
+            buf.push(ch);
+            sub_buf.push(ch);
         }
 
         let value = self.input.uncons_while(f);
@@ -639,7 +664,7 @@ where
         });
     }
 
-    fn append_to_tag_token_name(&mut self, c: char, raw_c: char) {
+    fn append_to_tag_token_name(&mut self, c: char, raw_c: u8) {
         if let Some(Token::StartTag { .. } | Token::EndTag { .. }) = &mut self.current_token {
             let b = self.buf.clone();
             let mut buf = b.borrow_mut();
@@ -647,11 +672,11 @@ where
             let mut sub_buf = b.borrow_mut();
 
             buf.push(c);
-            sub_buf.push(raw_c);
+            sub_buf.push(raw_c as char);
         }
     }
 
-    fn consume_and_append_to_tag_token_name<F>(&mut self, c: char, f: F)
+    fn consume_and_append_to_tag_token_name<F>(&mut self, c: u8, f: F)
     where
         F: Fn(char) -> bool,
     {
@@ -660,8 +685,14 @@ where
         let b = self.sub_buf.clone();
         let mut sub_buf = b.borrow_mut();
 
-        buf.push(c.to_ascii_lowercase());
-        sub_buf.push(c);
+        let ch = if is_non_ascii(c) {
+            self.current_char.unwrap_or(c as char)
+        } else {
+            c as char
+        };
+
+        buf.push(ch.to_ascii_lowercase());
+        sub_buf.push(ch);
 
         let value = self.input.uncons_while(f);
 
@@ -712,17 +743,29 @@ where
         }
     }
 
-    fn append_to_attribute_token_name(&mut self, c: char, raw_c: char) {
+    fn append_to_attribute_token_name(&mut self, c: u8, raw_c: u8) {
         let b = self.buf.clone();
         let mut buf = b.borrow_mut();
         let b = self.sub_buf.clone();
         let mut sub_buf = b.borrow_mut();
 
-        buf.push(c);
-        sub_buf.push(raw_c);
+        let ch = if is_non_ascii(c) {
+            self.current_char.unwrap_or(c as char)
+        } else {
+            c as char
+        };
+
+        let raw_ch = if is_non_ascii(raw_c) {
+            self.current_char.unwrap_or(raw_c as char)
+        } else {
+            raw_c as char
+        };
+
+        buf.push(ch);
+        sub_buf.push(raw_ch);
     }
 
-    fn consume_and_append_to_attribute_token_name<F>(&mut self, c: char, f: F)
+    fn consume_and_append_to_attribute_token_name<F>(&mut self, c: u8, f: F)
     where
         F: FnMut(char) -> bool,
     {
@@ -731,8 +774,14 @@ where
         let b = self.sub_buf.clone();
         let mut sub_buf = b.borrow_mut();
 
-        buf.push(c.to_ascii_lowercase());
-        sub_buf.push(c);
+        let ch = if is_non_ascii(c) {
+            self.current_char.unwrap_or(c as char)
+        } else {
+            c as char
+        };
+
+        buf.push(ch.to_ascii_lowercase());
+        sub_buf.push(ch);
 
         let value = self.input.uncons_while(f);
 
@@ -740,7 +789,7 @@ where
         sub_buf.push_str(value);
     }
 
-    fn consume_and_append_to_attribute_token_name_and_temp_buf<F>(&mut self, c: char, f: F)
+    fn consume_and_append_to_attribute_token_name_and_temp_buf<F>(&mut self, c: u8, f: F)
     where
         F: FnMut(char) -> bool,
     {
@@ -749,10 +798,16 @@ where
         let b = self.sub_buf.clone();
         let mut sub_buf = b.borrow_mut();
 
-        buf.push(c.to_ascii_lowercase());
-        sub_buf.push(c);
+        let ch = if is_non_ascii(c) {
+            self.current_char.unwrap_or(c as char)
+        } else {
+            c as char
+        };
+
+        buf.push(ch.to_ascii_lowercase());
+        sub_buf.push(ch);
 
-        self.temporary_buffer.push(c);
+        self.temporary_buffer.push(ch);
 
         let value = self.input.uncons_while(f);
 
@@ -814,12 +869,9 @@ where
             buf.push('\n');
             sub_buf.push('\r');
 
-            if self.input.cur() == Some('\n') {
-                unsafe {
-                    // Safety: cur() is Some('\n')
-                    self.input.bump();
-                }
-
+            if self.input.cur() == Some(b'\n') {
+                // ASCII newline is always 1 byte
+                self.input.bump_bytes(1);
                 sub_buf.push('\n');
             }
         } else {
@@ -833,7 +885,7 @@ where
         }
     }
 
-    fn consume_and_append_to_attribute_token_value<F>(&mut self, c: char, f: F)
+    fn consume_and_append_to_attribute_token_value<F>(&mut self, c: u8, f: F)
     where
         F: FnMut(char) -> bool,
     {
@@ -842,23 +894,26 @@ where
         let b = self.sub_buf.clone();
         let mut sub_buf = b.borrow_mut();
 
-        let is_cr = c == '\r';
+        let is_cr = c == b'\r';
 
         if is_cr {
             buf.push('\n');
-            sub_buf.push(c);
-
-            if self.input.cur() == Some('\n') {
-                unsafe {
-                    // Safety: cur() is Some('\n')
-                    self.input.bump();
-                }
+            sub_buf.push(c as char);
 
+            if self.input.cur() == Some(b'\n') {
+                // ASCII newline is always 1 byte
+                self.input.bump_bytes(1);
                 sub_buf.push('\n');
             }
         } else {
-            buf.push(c);
-            sub_buf.push(c);
+            let ch = if is_non_ascii(c) {
+                self.current_char.unwrap_or(c as char)
+            } else {
+                c as char
+            };
+
+            buf.push(ch);
+            sub_buf.push(ch);
         }
 
         let value = self.input.uncons_while(f);
@@ -964,7 +1019,7 @@ where
         sub_buf.push(raw_c);
     }
 
-    fn consume_and_append_to_comment_token<F>(&mut self, c: char, f: F)
+    fn consume_and_append_to_comment_token<F>(&mut self, c: u8, f: F)
     where
         F: Fn(char) -> bool,
     {
@@ -973,23 +1028,26 @@ where
         let b = self.sub_buf.clone();
         let mut sub_buf = b.borrow_mut();
 
-        let is_cr = c == '\r';
+        let is_cr = c == b'\r';
 
         if is_cr {
             buf.push('\n');
-            sub_buf.push(c);
-
-            if self.input.cur() == Some('\n') {
-                unsafe {
-                    // Safety: cur() is Some('\n')
-                    self.input.bump();
-                }
+            sub_buf.push(c as char);
 
+            if self.input.cur() == Some(b'\n') {
+                // ASCII newline is always 1 byte
+                self.input.bump_bytes(1);
                 sub_buf.push('\n');
             }
         } else {
-            buf.push(c);
-            sub_buf.push(c);
+            let ch = if is_non_ascii(c) {
+                self.current_char.unwrap_or(c as char)
+            } else {
+                c as char
+            };
+
+            buf.push(ch);
+            sub_buf.push(ch);
         }
 
         let value = self.input.uncons_while(f);
@@ -1018,19 +1076,29 @@ where
     }
 
     #[inline(always)]
-    fn emit_character_token(&mut self, value: char) {
+    fn emit_character_token(&mut self, value: u8) {
+        let ch = if is_non_ascii(value) {
+            self.current_char.unwrap_or(value as char)
+        } else {
+            value as char
+        };
         self.emit_token(Token::Character {
-            value,
+            value: ch,
             raw: Some(Raw::Same),
         });
     }
 
     #[inline(always)]
-    fn emit_character_token_with_raw(&mut self, c: char, raw_c: char) {
+    fn emit_character_token_with_raw(&mut self, c: char, raw_c: u8) {
         let b = self.buf.clone();
         let mut buf = b.borrow_mut();
 
-        buf.push(raw_c);
+        let raw_ch = if is_non_ascii(raw_c) {
+            self.current_char.unwrap_or(raw_c as char)
+        } else {
+            raw_c as char
+        };
+        buf.push(raw_ch);
 
         self.emit_token(Token::Character {
             value: c,
@@ -1040,20 +1108,18 @@ where
         buf.clear();
     }
 
-    fn handle_raw_and_emit_character_token(&mut self, c: char) {
-        let is_cr = c == '\r';
+    fn handle_raw_and_emit_character_token(&mut self, c: u8) {
+        let is_cr = c == b'\r';
 
         if is_cr {
             let b = self.buf.clone();
             let mut buf = b.borrow_mut();
 
-            buf.push(c);
+            buf.push(c as char);
 
-            if self.input.cur() == Some('\n') {
-                unsafe {
-                    // Safety: cur() is Some('\n')
-                    self.input.bump();
-                }
+            if self.input.cur() == Some(b'\n') {
+                // ASCII newline is always 1 byte
+                self.input.bump_bytes(1);
                 buf.push('\n');
             }
 
@@ -1064,8 +1130,13 @@ where
 
             buf.clear();
         } else {
+            let ch = if is_non_ascii(c) {
+                self.current_char.unwrap_or(c as char)
+            } else {
+                c as char
+            };
             self.emit_token(Token::Character {
-                value: c,
+                value: ch,
                 raw: Some(Raw::Same),
             });
         }
@@ -1103,19 +1174,19 @@ where
                     // U+0026 AMPERSAND (&)
                     // Set the return state to the data state. Switch to the character reference
                     // state.
-                    Some('&') => {
+                    Some(b'&') => {
                         self.return_state = State::Data;
                         self.state = State::CharacterReference;
                     }
                     // U+003C LESS-THAN SIGN (<)
                     // Switch to the tag open state.
-                    Some('<') => {
+                    Some(b'<') => {
                         self.state = State::TagOpen;
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Emit the current input
                     // character as a character token.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.emit_character_token(c);
                     }
@@ -1141,19 +1212,19 @@ where
                     // U+0026 AMPERSAND (&)
                     // Set the return state to the RCDATA state. Switch to the character
                     // reference state.
-                    Some('&') => {
+                    Some(b'&') => {
                         self.return_state = State::Rcdata;
                         self.state = State::CharacterReference;
                     }
                     // U+003C LESS-THAN SIGN (<)
                     // Switch to the RCDATA less-than sign state.
-                    Some('<') => {
+                    Some(b'<') => {
                         self.state = State::RcdataLessThanSign;
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Emit a U+FFFD
                     // REPLACEMENT CHARACTER character token.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.emit_character_token_with_raw(REPLACEMENT_CHARACTER, c);
                     }
@@ -1178,11 +1249,11 @@ where
                 match self.consume_next_char() {
                     // U+003C LESS-THAN SIGN (<)
                     // Switch to the RAWTEXT less-than sign state.
-                    Some('<') => self.state = State::RawtextLessThanSign,
+                    Some(b'<') => self.state = State::RawtextLessThanSign,
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Emit a U+FFFD
                     // REPLACEMENT CHARACTER character token.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.emit_character_token_with_raw(REPLACEMENT_CHARACTER, c);
                     }
@@ -1207,11 +1278,11 @@ where
                 match self.consume_next_char() {
                     // U+003C LESS-THAN SIGN (<)
                     // Switch to the script data less-than sign state.
-                    Some('<') => self.state = State::ScriptDataLessThanSign,
+                    Some(b'<') => self.state = State::ScriptDataLessThanSign,
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Emit a U+FFFD
                     // REPLACEMENT CHARACTER character token.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.emit_character_token_with_raw(REPLACEMENT_CHARACTER, c);
                     }
@@ -1237,7 +1308,7 @@ where
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Emit a U+FFFD
                     // REPLACEMENT CHARACTER character token.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.emit_character_token_with_raw(REPLACEMENT_CHARACTER, c);
                     }
@@ -1262,12 +1333,12 @@ where
                 match self.consume_next_char() {
                     // U+002F SOLIDUS (/)
                     // Switch to the end tag open state.
-                    Some('/') => {
+                    Some(b'/') => {
                         self.state = State::EndTagOpen;
                     }
                     // U+0021 EXCLAMATION MARK (!)
                     // Switch to the markup declaration open state.
-                    Some('!') => {
+                    Some(b'!') => {
                         self.state = State::MarkupDeclarationOpen;
                     }
                     // ASCII alpha
@@ -1281,7 +1352,7 @@ where
                     // This is an unexpected-question-mark-instead-of-tag-name parse error.
                     // Create a comment token whose data is the empty string. Reconsume in the
                     // bogus comment state.
-                    Some('?') => {
+                    Some(b'?') => {
                         self.emit_error(ErrorKind::UnexpectedQuestionMarkInsteadOfTagName);
                         self.create_comment_token("<");
                         self.reconsume_in_state(State::BogusComment);
@@ -1291,7 +1362,7 @@ where
                     // character token and an end-of-file token.
                     None => {
                         self.emit_error(ErrorKind::EofBeforeTagName);
-                        self.emit_character_token('<');
+                        self.emit_character_token(b'<');
                         self.emit_token(Token::Eof);
 
                         return Ok(());
@@ -1301,7 +1372,7 @@ where
                     // LESS-THAN SIGN character token. Reconsume in the data state.
                     _ => {
                         self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName);
-                        self.emit_character_token('<');
+                        self.emit_character_token(b'<');
                         self.reconsume_in_state(State::Data);
                     }
                 }
@@ -1319,7 +1390,7 @@ where
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // This is a missing-end-tag-name parse error. Switch to the data state.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.emit_error(ErrorKind::MissingEndTagName);
                         self.state = State::Data;
                     }
@@ -1329,8 +1400,8 @@ where
                     // token.
                     None => {
                         self.emit_error(ErrorKind::EofBeforeTagName);
-                        self.emit_character_token('<');
-                        self.emit_character_token('/');
+                        self.emit_character_token(b'<');
+                        self.emit_character_token(b'/');
                         self.emit_token(Token::Eof);
 
                         return Ok(());
@@ -1362,13 +1433,13 @@ where
                     }
                     // U+002F SOLIDUS (/)
                     // Switch to the self-closing start tag state.
-                    Some('/') => {
+                    Some(b'/') => {
                         self.finish_tag_token_name();
                         self.state = State::SelfClosingStartTag;
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the current tag token.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.finish_tag_token_name();
                         self.state = State::Data;
                         self.emit_tag_token();
@@ -1377,12 +1448,12 @@ where
                     // Append the lowercase version of the current input character (add 0x0020
                     // to the character's code point) to the current tag token's tag name.
                     Some(c) if is_ascii_upper_alpha(c) => {
-                        self.consume_and_append_to_tag_token_name(c, is_ascii_upper_alpha);
+                        self.consume_and_append_to_tag_token_name(c, is_ascii_upper_alpha_char);
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the current tag token's tag name.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.append_to_tag_token_name(REPLACEMENT_CHARACTER, c);
                     }
@@ -1406,9 +1477,9 @@ where
 
                             // List of characters from above to stop consumption and a certain
                             // branch took control
-                            !is_spacy(c)
+                            !is_spacy_char(c)
                                 && !matches!(c, '/' | '>' | '\x00')
-                                && !is_ascii_upper_alpha(c)
+                                && !is_ascii_upper_alpha_char(c)
                         });
                     }
                 }
@@ -1420,7 +1491,7 @@ where
                     // U+002F SOLIDUS (/)
                     // Set the temporary buffer to the empty string. Switch to the RCDATA end
                     // tag open state.
-                    Some('/') => {
+                    Some(b'/') => {
                         self.temporary_buffer.clear();
                         self.state = State::RcdataEndTagOpen;
                     }
@@ -1428,7 +1499,7 @@ where
                     // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RCDATA
                     // state.
                     _ => {
-                        self.emit_character_token('<');
+                        self.emit_character_token(b'<');
                         self.reconsume_in_state(State::Rcdata);
                     }
                 }
@@ -1448,8 +1519,8 @@ where
                     // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
                     // character token. Reconsume in the RCDATA state.
                     _ => {
-                        self.emit_character_token('<');
-                        self.emit_character_token('/');
+                        self.emit_character_token(b'<');
+                        self.emit_character_token(b'/');
                         self.reconsume_in_state(State::Rcdata);
                     }
                 }
@@ -1458,8 +1529,8 @@ where
             State::RcdataEndTagName => {
                 let anything_else = |lexer: &mut Lexer<'a, I>| {
                     lexer.finish_tag_token_name();
-                    lexer.emit_character_token('<');
-                    lexer.emit_character_token('/');
+                    lexer.emit_character_token(b'<');
+                    lexer.emit_character_token(b'/');
                     lexer.emit_temporary_buffer_as_character_tokens();
                     lexer.reconsume_in_state(State::Rcdata);
                 };
@@ -1487,7 +1558,7 @@ where
                     // If the current end tag token is an appropriate end tag token, then switch
                     // to the self-closing start tag state. Otherwise, treat it as per the
                     // "anything else" entry below.
-                    Some('/') => {
+                    Some(b'/') => {
                         if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
                             self.finish_tag_token_name();
                             self.state = State::SelfClosingStartTag;
@@ -1499,7 +1570,7 @@ where
                     // If the current end tag token is an appropriate end tag token, then switch
                     // to the data state and emit the current tag token. Otherwise, treat it as
                     // per the "anything else" entry below.
-                    Some('>') => {
+                    Some(b'>') => {
                         if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
                             self.finish_tag_token_name();
                             self.state = State::Data;
@@ -1513,19 +1584,17 @@ where
                     // to the character's code point) to the current tag token's tag name.
                     // Append the current input character to the temporary buffer.
                     Some(c) if is_ascii_upper_alpha(c) => {
-                        self.consume_and_append_to_attribute_token_name_and_temp_buf(
-                            c,
-                            is_ascii_upper_alpha,
-                        );
+                        self.consume_and_append_to_attribute_token_name_and_temp_buf(c, |ch| {
+                            is_ascii_upper_alpha(ch as u8)
+                        });
                     }
                     // ASCII lower alpha
                     // Append the current input character to the current tag token's tag name.
                     // Append the current input character to the temporary buffer.
                     Some(c) if is_ascii_lower_alpha(c) => {
-                        self.consume_and_append_to_attribute_token_name_and_temp_buf(
-                            c,
-                            is_ascii_lower_alpha,
-                        );
+                        self.consume_and_append_to_attribute_token_name_and_temp_buf(c, |ch| {
+                            is_ascii_lower_alpha(ch as u8)
+                        });
                     }
                     // Anything else
                     // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
@@ -1544,7 +1613,7 @@ where
                     // U+002F SOLIDUS (/)
                     // Set the temporary buffer to the empty string. Switch to the RAWTEXT end
                     // tag open state.
-                    Some('/') => {
+                    Some(b'/') => {
                         self.temporary_buffer.clear();
                         self.state = State::RawtextEndTagOpen;
                     }
@@ -1552,7 +1621,7 @@ where
                     // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RAWTEXT
                     // state.
                     _ => {
-                        self.emit_character_token('<');
+                        self.emit_character_token(b'<');
                         self.reconsume_in_state(State::Rawtext);
                     }
                 }
@@ -1572,8 +1641,8 @@ where
                     // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
                     // character token. Reconsume in the RAWTEXT state.
                     _ => {
-                        self.emit_character_token('<');
-                        self.emit_character_token('/');
+                        self.emit_character_token(b'<');
+                        self.emit_character_token(b'/');
                         self.reconsume_in_state(State::Rawtext);
                     }
                 }
@@ -1582,8 +1651,8 @@ where
             State::RawtextEndTagName => {
                 let anything_else = |lexer: &mut Lexer<'a, I>| {
                     lexer.finish_tag_token_name();
-                    lexer.emit_character_token('<');
-                    lexer.emit_character_token('/');
+                    lexer.emit_character_token(b'<');
+                    lexer.emit_character_token(b'/');
                     lexer.emit_temporary_buffer_as_character_tokens();
                     lexer.reconsume_in_state(State::Rawtext);
                 };
@@ -1611,7 +1680,7 @@ where
                     // If the current end tag token is an appropriate end tag token, then switch
                     // to the self-closing start tag state. Otherwise, treat it as per the
                     // "anything else" entry below.
-                    Some('/') => {
+                    Some(b'/') => {
                         if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
                             self.finish_tag_token_name();
                             self.state = State::SelfClosingStartTag;
@@ -1623,7 +1692,7 @@ where
                     // If the current end tag token is an appropriate end tag token, then switch
                     // to the data state and emit the current tag token. Otherwise, treat it as
                     // per the "anything else" entry below.
-                    Some('>') => {
+                    Some(b'>') => {
                         if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
                             self.finish_tag_token_name();
                             self.state = State::Data;
@@ -1637,19 +1706,17 @@ where
                     // to the character's code point) to the current tag token's tag name.
                     // Append the current input character to the temporary buffer.
                     Some(c) if is_ascii_upper_alpha(c) => {
-                        self.consume_and_append_to_attribute_token_name_and_temp_buf(
-                            c,
-                            is_ascii_upper_alpha,
-                        );
+                        self.consume_and_append_to_attribute_token_name_and_temp_buf(c, |ch| {
+                            is_ascii_upper_alpha(ch as u8)
+                        });
                     }
                     // ASCII lower alpha
                     // Append the current input character to the current tag token's tag name.
                     // Append the current input character to the temporary buffer.
                     Some(c) if is_ascii_lower_alpha(c) => {
-                        self.consume_and_append_to_attribute_token_name_and_temp_buf(
-                            c,
-                            is_ascii_lower_alpha,
-                        );
+                        self.consume_and_append_to_attribute_token_name_and_temp_buf(c, |ch| {
+                            is_ascii_lower_alpha(ch as u8)
+                        });
                     }
                     // Anything else
                     // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
@@ -1668,23 +1735,23 @@ where
                     // U+002F SOLIDUS (/)
                     // Set the temporary buffer to the empty string. Switch to the script data
                     // end tag open state.
-                    Some('/') => {
+                    Some(b'/') => {
                         self.temporary_buffer.clear();
                         self.state = State::ScriptDataEndTagOpen;
                     }
                     // U+0021 EXCLAMATION MARK (!)
                     // Switch to the script data escape start state. Emit a U+003C LESS-THAN
                     // SIGN character token and a U+0021 EXCLAMATION MARK character token.
-                    Some('!') => {
+                    Some(b'!') => {
                         self.state = State::ScriptDataEscapeStart;
-                        self.emit_character_token('<');
-                        self.emit_character_token('!');
+                        self.emit_character_token(b'<');
+                        self.emit_character_token(b'!');
                     }
                     // Anything else
                     // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
                     // data state.
                     _ => {
-                        self.emit_character_token('<');
+                        self.emit_character_token(b'<');
                         self.reconsume_in_state(State::ScriptData);
                     }
                 }
@@ -1704,8 +1771,8 @@ where
                     // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
                     // character token. Reconsume in the script data state.
                     _ => {
-                        self.emit_character_token('<');
-                        self.emit_character_token('/');
+                        self.emit_character_token(b'<');
+                        self.emit_character_token(b'/');
                         self.reconsume_in_state(State::ScriptData);
                     }
                 }
@@ -1714,8 +1781,8 @@ where
             State::ScriptDataEndTagName => {
                 let anything_else = |lexer: &mut Lexer<'a, I>| {
                     lexer.finish_tag_token_name();
-                    lexer.emit_character_token('<');
-                    lexer.emit_character_token('/');
+                    lexer.emit_character_token(b'<');
+                    lexer.emit_character_token(b'/');
                     lexer.emit_temporary_buffer_as_character_tokens();
                     lexer.reconsume_in_state(State::ScriptData);
                 };
@@ -1743,7 +1810,7 @@ where
                     // If the current end tag token is an appropriate end tag token, then switch
                     // to the self-closing start tag state. Otherwise, treat it as per the
                     // "anything else" entry below.
-                    Some('/') => {
+                    Some(b'/') => {
                         if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
                             self.finish_tag_token_name();
                             self.state = State::SelfClosingStartTag;
@@ -1755,7 +1822,7 @@ where
                     // If the current end tag token is an appropriate end tag token, then switch
                     // to the data state and emit the current tag token. Otherwise, treat it as
                     // per the "anything else" entry below.
-                    Some('>') => {
+                    Some(b'>') => {
                         if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
                             self.finish_tag_token_name();
                             self.state = State::Data;
@@ -1769,19 +1836,17 @@ where
                     // to the character's code point) to the current tag token's tag name.
                     // Append the current input character to the temporary buffer.
                     Some(c) if is_ascii_upper_alpha(c) => {
-                        self.consume_and_append_to_attribute_token_name_and_temp_buf(
-                            c,
-                            is_ascii_upper_alpha,
-                        );
+                        self.consume_and_append_to_attribute_token_name_and_temp_buf(c, |ch| {
+                            is_ascii_upper_alpha(ch as u8)
+                        });
                     }
                     // ASCII lower alpha
                     // Append the current input character to the current tag token's tag name.
                     // Append the current input character to the temporary buffer.
                     Some(c) if is_ascii_lower_alpha(c) => {
-                        self.consume_and_append_to_attribute_token_name_and_temp_buf(
-                            c,
-                            is_ascii_lower_alpha,
-                        );
+                        self.consume_and_append_to_attribute_token_name_and_temp_buf(c, |ch| {
+                            is_ascii_lower_alpha(ch as u8)
+                        });
                     }
                     // Anything else
                     // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
@@ -1800,7 +1865,7 @@ where
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the script data escape start dash state. Emit a U+002D
                     // HYPHEN-MINUS character token.
-                    Some(c @ '-') => {
+                    Some(c @ b'-') => {
                         self.state = State::ScriptDataEscapeStartDash;
                         self.emit_character_token(c);
                     }
@@ -1818,7 +1883,7 @@ where
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the script data escaped dash dash state. Emit a U+002D
                     // HYPHEN-MINUS character token.
-                    Some(c @ '-') => {
+                    Some(c @ b'-') => {
                         self.state = State::ScriptDataEscapedDashDash;
                         self.emit_character_token(c);
                     }
@@ -1836,19 +1901,19 @@ where
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the script data escaped dash state. Emit a U+002D HYPHEN-MINUS
                     // character token.
-                    Some(c @ '-') => {
+                    Some(c @ b'-') => {
                         self.state = State::ScriptDataEscapedDash;
                         self.emit_character_token(c);
                     }
                     // U+003C LESS-THAN SIGN (<)
                     // Switch to the script data escaped less-than sign state.
-                    Some('<') => {
+                    Some(b'<') => {
                         self.state = State::ScriptDataEscapedLessThanSign;
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Emit a U+FFFD
                     // REPLACEMENT CHARACTER character token.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.emit_character_token_with_raw(REPLACEMENT_CHARACTER, c);
                     }
@@ -1876,19 +1941,19 @@ where
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the script data escaped dash dash state. Emit a U+002D
                     // HYPHEN-MINUS character token.
-                    Some(c @ '-') => {
+                    Some(c @ b'-') => {
                         self.state = State::ScriptDataEscapedDashDash;
                         self.emit_character_token(c);
                     }
                     // U+003C LESS-THAN SIGN (<)
                     // Switch to the script data escaped less-than sign state.
-                    Some('<') => {
+                    Some(b'<') => {
                         self.state = State::ScriptDataEscapedLessThanSign;
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Switch to the script
                     // data escaped state. Emit a U+FFFD REPLACEMENT CHARACTER character token.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.state = State::ScriptDataEscaped;
                         self.emit_character_token_with_raw(REPLACEMENT_CHARACTER, c);
@@ -1918,25 +1983,25 @@ where
                 match self.consume_next_char() {
                     // U+002D HYPHEN-MINUS (-)
                     // Emit a U+002D HYPHEN-MINUS character token.
-                    Some(c @ '-') => {
+                    Some(c @ b'-') => {
                         self.emit_character_token(c);
                     }
                     // U+003C LESS-THAN SIGN (<)
                     // Switch to the script data escaped less-than sign state.
-                    Some('<') => {
+                    Some(b'<') => {
                         self.state = State::ScriptDataEscapedLessThanSign;
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the script data state. Emit a U+003E GREATER-THAN SIGN
                     // character token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.state = State::ScriptData;
                         self.emit_character_token(c);
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Switch to the script
                     // data escaped state. Emit a U+FFFD REPLACEMENT CHARACTER character token.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.state = State::ScriptDataEscaped;
                         self.emit_character_token_with_raw(REPLACEMENT_CHARACTER, c);
@@ -1967,7 +2032,7 @@ where
                     // U+002F SOLIDUS (/)
                     // Set the temporary buffer to the empty string. Switch to the script data
                     // escaped end tag open state.
-                    Some('/') => {
+                    Some(b'/') => {
                         self.temporary_buffer.clear();
                         self.state = State::ScriptDataEscapedEndTagOpen;
                     }
@@ -1977,14 +2042,14 @@ where
                     // state.
                     Some(c) if is_ascii_alpha(c) => {
                         self.temporary_buffer.clear();
-                        self.emit_character_token('<');
+                        self.emit_character_token(b'<');
                         self.reconsume_in_state(State::ScriptDataDoubleEscapeStart);
                     }
                     // Anything else
                     // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
                     // data escaped state.
                     _ => {
-                        self.emit_character_token('<');
+                        self.emit_character_token(b'<');
                         self.reconsume_in_state(State::ScriptDataEscaped);
                     }
                 }
@@ -2004,8 +2069,8 @@ where
                     // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
                     // character token. Reconsume in the script data escaped state.
                     _ => {
-                        self.emit_character_token('<');
-                        self.emit_character_token('/');
+                        self.emit_character_token(b'<');
+                        self.emit_character_token(b'/');
                         self.reconsume_in_state(State::ScriptDataEscaped);
                     }
                 }
@@ -2014,8 +2079,8 @@ where
             State::ScriptDataEscapedEndTagName => {
                 let anything_else = |lexer: &mut Lexer<'a, I>| {
                     lexer.finish_tag_token_name();
-                    lexer.emit_character_token('<');
-                    lexer.emit_character_token('/');
+                    lexer.emit_character_token(b'<');
+                    lexer.emit_character_token(b'/');
                     lexer.emit_temporary_buffer_as_character_tokens();
                     lexer.reconsume_in_state(State::ScriptDataEscaped);
                 };
@@ -2043,7 +2108,7 @@ where
                     // If the current end tag token is an appropriate end tag token, then switch
                     // to the self-closing start tag state. Otherwise, treat it as per the
                     // "anything else" entry below.
-                    Some('/') => {
+                    Some(b'/') => {
                         if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
                             self.finish_tag_token_name();
                             self.state = State::SelfClosingStartTag;
@@ -2055,7 +2120,7 @@ where
                     // If the current end tag token is an appropriate end tag token, then switch
                     // to the data state and emit the current tag token. Otherwise, treat it as
                     // per the "anything else" entry below.
-                    Some('>') => {
+                    Some(b'>') => {
                         if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
                             self.finish_tag_token_name();
                             self.state = State::Data;
@@ -2069,19 +2134,17 @@ where
                     // to the character's code point) to the current tag token's tag name.
                     // Append the current input character to the temporary buffer.
                     Some(c) if is_ascii_upper_alpha(c) => {
-                        self.consume_and_append_to_attribute_token_name_and_temp_buf(
-                            c,
-                            is_ascii_upper_alpha,
-                        );
+                        self.consume_and_append_to_attribute_token_name_and_temp_buf(c, |ch| {
+                            is_ascii_upper_alpha(ch as u8)
+                        });
                     }
                     // ASCII lower alpha
                     // Append the current input character to the current tag token's tag name.
                     // Append the current input character to the temporary buffer.
                     Some(c) if is_ascii_lower_alpha(c) => {
-                        self.consume_and_append_to_attribute_token_name_and_temp_buf(
-                            c,
-                            is_ascii_lower_alpha,
-                        );
+                        self.consume_and_append_to_attribute_token_name_and_temp_buf(c, |ch| {
+                            is_ascii_lower_alpha(ch as u8)
+                        });
                     }
                     // Anything else
                     // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
@@ -2117,7 +2180,7 @@ where
 
                         self.handle_raw_and_emit_character_token(c);
                     }
-                    Some(c @ '/' | c @ '>') => {
+                    Some(c @ b'/' | c @ b'>') => {
                         let is_script = self.temporary_buffer == "script";
 
                         if is_script {
@@ -2133,14 +2196,14 @@ where
                     // to the character's code point) to the temporary buffer. Emit the current
                     // input character as a character token.
                     Some(c) if is_ascii_upper_alpha(c) => {
-                        self.temporary_buffer.push(c.to_ascii_lowercase());
+                        self.temporary_buffer.push(c.to_ascii_lowercase() as char);
                         self.emit_character_token(c);
                     }
                     // ASCII lower alpha
                     // Append the current input character to the temporary buffer. Emit the
                     // current input character as a character token.
                     Some(c) if is_ascii_lower_alpha(c) => {
-                        self.temporary_buffer.push(c);
+                        self.temporary_buffer.push(c as char);
                         self.emit_character_token(c);
                     }
                     // Anything else
@@ -2157,21 +2220,21 @@ where
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the script data double escaped dash state. Emit a U+002D
                     // HYPHEN-MINUS character token.
-                    Some(c @ '-') => {
+                    Some(c @ b'-') => {
                         self.state = State::ScriptDataDoubleEscapedDash;
                         self.emit_character_token(c);
                     }
                     // U+003C LESS-THAN SIGN (<)
                     // Switch to the script data double escaped less-than sign state. Emit a
                     // U+003C LESS-THAN SIGN character token.
-                    Some(c @ '<') => {
+                    Some(c @ b'<') => {
                         self.state = State::ScriptDataDoubleEscapedLessThanSign;
                         self.emit_character_token(c);
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Emit a U+FFFD
                     // REPLACEMENT CHARACTER character token.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.emit_character_token_with_raw(REPLACEMENT_CHARACTER, c);
                     }
@@ -2199,14 +2262,14 @@ where
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the script data double escaped dash dash state. Emit a U+002D
                     // HYPHEN-MINUS character token.
-                    Some(c @ '-') => {
+                    Some(c @ b'-') => {
                         self.state = State::ScriptDataDoubleEscapedDashDash;
                         self.emit_character_token(c);
                     }
                     // U+003C LESS-THAN SIGN (<)
                     // Switch to the script data double escaped less-than sign state. Emit a
                     // U+003C LESS-THAN SIGN character token.
-                    Some(c @ '<') => {
+                    Some(c @ b'<') => {
                         self.state = State::ScriptDataDoubleEscapedLessThanSign;
                         self.emit_character_token(c);
                     }
@@ -2214,7 +2277,7 @@ where
                     // This is an unexpected-null-character parse error. Switch to the script
                     // data double escaped state. Emit a U+FFFD REPLACEMENT CHARACTER character
                     // token.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.state = State::ScriptDataDoubleEscaped;
                         self.emit_character_token_with_raw(REPLACEMENT_CHARACTER, c);
@@ -2244,20 +2307,20 @@ where
                 match self.consume_next_char() {
                     // U+002D HYPHEN-MINUS (-)
                     // Emit a U+002D HYPHEN-MINUS character token.
-                    Some(c @ '-') => {
+                    Some(c @ b'-') => {
                         self.emit_character_token(c);
                     }
                     // U+003C LESS-THAN SIGN (<)
                     // Switch to the script data double escaped less-than sign state. Emit a
                     // U+003C LESS-THAN SIGN character token.
-                    Some(c @ '<') => {
+                    Some(c @ b'<') => {
                         self.state = State::ScriptDataDoubleEscapedLessThanSign;
                         self.emit_character_token(c);
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the script data state. Emit a U+003E GREATER-THAN SIGN
                     // character token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.state = State::ScriptData;
                         self.emit_character_token(c);
                     }
@@ -2265,7 +2328,7 @@ where
                     // This is an unexpected-null-character parse error. Switch to the script
                     // data double escaped state. Emit a U+FFFD REPLACEMENT CHARACTER character
                     // token.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.state = State::ScriptDataDoubleEscaped;
                         self.emit_character_token_with_raw(REPLACEMENT_CHARACTER, c);
@@ -2296,7 +2359,7 @@ where
                     // U+002F SOLIDUS (/)
                     // Set the temporary buffer to the empty string. Switch to the script data
                     // double escape end state. Emit a U+002F SOLIDUS character token.
-                    Some(c @ '/') => {
+                    Some(c @ b'/') => {
                         self.temporary_buffer.clear();
                         self.state = State::ScriptDataDoubleEscapeEnd;
                         self.emit_character_token(c);
@@ -2332,7 +2395,7 @@ where
 
                         self.handle_raw_and_emit_character_token(c);
                     }
-                    Some(c @ '/' | c @ '>') => {
+                    Some(c @ b'/' | c @ b'>') => {
                         let is_script = self.temporary_buffer == "script";
 
                         if is_script {
@@ -2348,14 +2411,14 @@ where
                     // to the character's code point) to the temporary buffer. Emit the current
                     // input character as a character token.
                     Some(c) if is_ascii_upper_alpha(c) => {
-                        self.temporary_buffer.push(c.to_ascii_lowercase());
+                        self.temporary_buffer.push(c.to_ascii_lowercase() as char);
                         self.emit_character_token(c);
                     }
                     // ASCII lower alpha
                     // Append the current input character to the temporary buffer. Emit the
                     // current input character as a character token.
                     Some(c) if is_ascii_lower_alpha(c) => {
-                        self.temporary_buffer.push(c);
+                        self.temporary_buffer.push(c as char);
 
                         self.emit_character_token(c);
                     }
@@ -2382,7 +2445,7 @@ where
                     // U+003E GREATER-THAN SIGN (>)
                     // EOF
                     // Reconsume in the after attribute name state.
-                    Some('/') | Some('>') | None => {
+                    Some(b'/') | Some(b'>') | None => {
                         self.reconsume_in_state(State::AfterAttributeName);
                     }
                     // U+003D EQUALS SIGN (=)
@@ -2391,7 +2454,7 @@ where
                     // to the current input character, and its value to the empty string. Switch
                     // to the attribute name state.
                     // We set `None` for `value` to support boolean attributes in AST
-                    Some(c @ '=') => {
+                    Some(c @ b'=') => {
                         self.emit_error(ErrorKind::UnexpectedEqualsSignBeforeAttributeName);
                         self.start_new_attribute_token();
                         self.append_to_attribute_token_name(c, c);
@@ -2409,7 +2472,7 @@ where
             }
             // https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
             State::AttributeName => {
-                let anything_else = |lexer: &mut Lexer<'a, I>, c: char| {
+                let anything_else = |lexer: &mut Lexer<'a, I>, c: u8| {
                     lexer.append_to_attribute_token_name(c, c);
                 };
 
@@ -2428,13 +2491,13 @@ where
                         self.skip_whitespaces(c);
                         self.reconsume_in_state(State::AfterAttributeName);
                     }
-                    Some('/' | '>') | None => {
+                    Some(b'/' | b'>') | None => {
                         self.finish_attribute_token_name();
                         self.reconsume_in_state(State::AfterAttributeName);
                     }
                     // U+003D EQUALS SIGN (=)
                     // Switch to the before attribute value state.
-                    Some('=') => {
+                    Some(b'=') => {
                         self.finish_attribute_token_name();
                         self.state = State::BeforeAttributeValue;
                     }
@@ -2442,23 +2505,25 @@ where
                     // Append the lowercase version of the current input character (add 0x0020
                     // to the character's code point) to the current attribute's name.
                     Some(c) if is_ascii_upper_alpha(c) => {
-                        self.consume_and_append_to_attribute_token_name(c, |c| {
-                            is_ascii_upper_alpha(c)
+                        self.consume_and_append_to_attribute_token_name(c, |ch| {
+                            is_ascii_upper_alpha(ch as u8)
                         });
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the current attribute's name.
-                    Some(c @ '\x00') => {
+                    Some(_c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
-                        self.append_to_attribute_token_name(REPLACEMENT_CHARACTER, c);
+                        let b = self.buf.clone();
+                        let mut buf = b.borrow_mut();
+                        buf.push(REPLACEMENT_CHARACTER);
                     }
                     // U+0022 QUOTATION MARK (")
                     // U+0027 APOSTROPHE (')
                     // U+003C LESS-THAN SIGN (<)
                     // This is an unexpected-character-in-attribute-name parse error. Treat it
                     // as per the "anything else" entry below.
-                    Some(c @ '"') | Some(c @ '\'') | Some(c @ '<') => {
+                    Some(c @ b'"') | Some(c @ b'\'') | Some(c @ b'<') => {
                         self.emit_error(ErrorKind::UnexpectedCharacterInAttributeName);
 
                         anything_else(self, c);
@@ -2467,16 +2532,16 @@ where
                     // Append the current input character to the current attribute's name.
                     Some(c) => {
                         self.validate_input_stream_character(c);
-                        self.consume_and_append_to_attribute_token_name(c, |c| {
-                            if !is_allowed_character(c) {
+                        self.consume_and_append_to_attribute_token_name(c, |ch| {
+                            if !is_allowed_character(ch) {
                                 return false;
                             }
 
                             // List of characters from above to stop consumption and a certain
                             // branch took control
-                            !is_spacy(c)
-                                && !matches!(c, '/' | '>' | '=' | '\x00' | '"' | '\'' | '<')
-                                && !is_ascii_upper_alpha(c)
+                            !is_spacy_char(ch)
+                                && !matches!(ch, '/' | '>' | '=' | '\x00' | '"' | '\'' | '<')
+                                && !is_ascii_upper_alpha_char(ch)
                         });
                     }
                 }
@@ -2505,17 +2570,17 @@ where
                     }
                     // U+002F SOLIDUS (/)
                     // Switch to the self-closing start tag state.
-                    Some('/') => {
+                    Some(b'/') => {
                         self.state = State::SelfClosingStartTag;
                     }
                     // U+003D EQUALS SIGN (=)
                     // Switch to the before attribute value state.
-                    Some('=') => {
+                    Some(b'=') => {
                         self.state = State::BeforeAttributeValue;
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the current tag token.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.state = State::Data;
                         self.emit_tag_token();
                     }
@@ -2551,20 +2616,20 @@ where
                     }
                     // U+0022 QUOTATION MARK (")
                     // Switch to the attribute value (double-quoted) state.
-                    Some(c @ '"') => {
-                        self.append_to_attribute_token_value(None, Some(c));
+                    Some(c @ b'"') => {
+                        self.append_to_attribute_token_value(None, Some(c as char));
                         self.state = State::AttributeValueDoubleQuoted;
                     }
                     // U+0027 APOSTROPHE (')
                     // Switch to the attribute value (single-quoted) state.
-                    Some(c @ '\'') => {
-                        self.append_to_attribute_token_value(None, Some(c));
+                    Some(c @ b'\'') => {
+                        self.append_to_attribute_token_value(None, Some(c as char));
                         self.state = State::AttributeValueSingleQuoted;
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // This is a missing-attribute-value parse error. Switch to the data state.
                     // Emit the current tag token.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.emit_error(ErrorKind::MissingAttributeValue);
                         self.state = State::Data;
                         self.emit_tag_token();
@@ -2583,23 +2648,26 @@ where
                     // U+0022 QUOTATION MARK (")
                     // Switch to the after attribute value (quoted) state.
                     // We set value to support empty attributes (i.e. `attr=""`)
-                    Some(c @ '"') => {
-                        self.append_to_attribute_token_value(None, Some(c));
+                    Some(c @ b'"') => {
+                        self.append_to_attribute_token_value(None, Some(c as char));
                         self.state = State::AfterAttributeValueQuoted;
                     }
                     // U+0026 AMPERSAND (&)
                     // Set the return state to the attribute value (double-quoted) state. Switch
                     // to the character reference state.
-                    Some('&') => {
+                    Some(b'&') => {
                         self.return_state = State::AttributeValueDoubleQuoted;
                         self.state = State::CharacterReference;
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the current attribute's value.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
-                        self.append_to_attribute_token_value(Some(REPLACEMENT_CHARACTER), Some(c));
+                        self.append_to_attribute_token_value(
+                            Some(REPLACEMENT_CHARACTER),
+                            Some(c as char),
+                        );
                     }
                     // EOF
                     // This is an eof-in-tag parse error. Emit an end-of-file token.
@@ -2613,14 +2681,14 @@ where
                     // Append the current input character to the current attribute's value.
                     Some(c) => {
                         self.validate_input_stream_character(c);
-                        self.consume_and_append_to_attribute_token_value(c, |c| {
-                            if !is_allowed_character(c) {
+                        self.consume_and_append_to_attribute_token_value(c, |ch| {
+                            if !is_allowed_character(ch) {
                                 return false;
                             }
 
                             // List of characters from above to stop consumption and a certain
                             // branch took control, `\r` is in list because of newline normalization
-                            !matches!(c, '"' | '&' | '\x00' | '\r')
+                            !matches!(ch, '"' | '&' | '\x00' | '\r')
                         });
                     }
                 }
@@ -2632,23 +2700,26 @@ where
                     // U+0027 APOSTROPHE (')
                     // Switch to the after attribute value (quoted) state.
                     // We set value to support empty attributes (i.e. `attr=''`)
-                    Some(c @ '\'') => {
-                        self.append_to_attribute_token_value(None, Some(c));
+                    Some(c @ b'\'') => {
+                        self.append_to_attribute_token_value(None, Some(c as char));
                         self.state = State::AfterAttributeValueQuoted;
                     }
                     // U+0026 AMPERSAND (&)
                     // Set the return state to the attribute value (single-quoted) state. Switch
                     // to the character reference state.
-                    Some('&') => {
+                    Some(b'&') => {
                         self.return_state = State::AttributeValueSingleQuoted;
                         self.state = State::CharacterReference;
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the current attribute's value.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
-                        self.append_to_attribute_token_value(Some(REPLACEMENT_CHARACTER), Some(c));
+                        self.append_to_attribute_token_value(
+                            Some(REPLACEMENT_CHARACTER),
+                            Some(c as char),
+                        );
                     }
                     // EOF
                     // This is an eof-in-tag parse error. Emit an end-of-file token.
@@ -2662,14 +2733,14 @@ where
                     // Append the current input character to the current attribute's value.
                     Some(c) => {
                         self.validate_input_stream_character(c);
-                        self.consume_and_append_to_attribute_token_value(c, |c| {
-                            if !is_allowed_character(c) {
+                        self.consume_and_append_to_attribute_token_value(c, |ch| {
+                            if !is_allowed_character(ch) {
                                 return false;
                             }
 
                             // List of characters from above to stop consumption and a certain
                             // branch took control, `\r` is in list because of newline normalization
-                            !matches!(c, '\'' | '&' | '\x00' | '\r')
+                            !matches!(ch, '\'' | '&' | '\x00' | '\r')
                         });
                     }
                 }
@@ -2695,13 +2766,13 @@ where
                     // U+0026 AMPERSAND (&)
                     // Set the return state to the attribute value (unquoted) state. Switch to
                     // the character reference state.
-                    Some('&') => {
+                    Some(b'&') => {
                         self.return_state = State::AttributeValueUnquoted;
                         self.state = State::CharacterReference;
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the current tag token.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.finish_attribute_token_value();
                         self.state = State::Data;
                         self.emit_tag_token();
@@ -2709,9 +2780,12 @@ where
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the current attribute's value.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
-                        self.append_to_attribute_token_value(Some(REPLACEMENT_CHARACTER), Some(c));
+                        self.append_to_attribute_token_value(
+                            Some(REPLACEMENT_CHARACTER),
+                            Some(c as char),
+                        );
                     }
                     // U+0022 QUOTATION MARK (")
                     // U+0027 APOSTROPHE (')
@@ -2720,11 +2794,11 @@ where
                     // U+0060 GRAVE ACCENT (`)
                     // This is an unexpected-character-in-unquoted-attribute-value parse error.
                     // Treat it as per the "anything else" entry below.
-                    Some(c @ '"') | Some(c @ '\'') | Some(c @ '<') | Some(c @ '=')
-                    | Some(c @ '`') => {
+                    Some(c @ b'"') | Some(c @ b'\'') | Some(c @ b'<') | Some(c @ b'=')
+                    | Some(c @ b'`') => {
                         self.emit_error(ErrorKind::UnexpectedCharacterInUnquotedAttributeValue);
 
-                        anything_else(self, c);
+                        anything_else(self, c as char);
                     }
                     // EOF
                     // This is an eof-in-tag parse error. Emit an end-of-file token.
@@ -2739,16 +2813,16 @@ where
                     // Append the current input character to the current attribute's value.
                     Some(c) => {
                         self.validate_input_stream_character(c);
-                        self.consume_and_append_to_attribute_token_value(c, |c| {
-                            if !is_allowed_character(c) {
+                        self.consume_and_append_to_attribute_token_value(c, |ch| {
+                            if !is_allowed_character(ch) {
                                 return false;
                             }
 
                             // List of characters from above to stop consumption and a certain
                             // branch took control, `\r` is in list because of newline normalization
-                            !is_spacy(c)
+                            !is_spacy_char(ch)
                                 && !matches!(
-                                    c,
+                                    ch,
                                     '&' | '>' | '\x00' | '"' | '\'' | '<' | '=' | '`' | '\r'
                                 )
                         });
@@ -2771,13 +2845,13 @@ where
                     }
                     // U+002F SOLIDUS (/)
                     // Switch to the self-closing start tag state.
-                    Some('/') => {
+                    Some(b'/') => {
                         self.finish_attribute_token_value();
                         self.state = State::SelfClosingStartTag;
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the current tag token.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.finish_attribute_token_value();
                         self.state = State::Data;
                         self.emit_tag_token();
@@ -2808,7 +2882,7 @@ where
                     // U+003E GREATER-THAN SIGN (>)
                     // Set the self-closing flag of the current tag token. Switch to the data
                     // state. Emit the current tag token.
-                    Some('>') => {
+                    Some(b'>') => {
                         if let Some(
                             Token::StartTag {
                                 is_self_closing, ..
@@ -2847,7 +2921,7 @@ where
                 match self.consume_next_char() {
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the current comment token.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.state = State::Data;
                         self.emit_comment_token(Some(">"));
                     }
@@ -2862,9 +2936,9 @@ where
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the comment token's data.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
-                        self.append_to_comment_token(REPLACEMENT_CHARACTER, c);
+                        self.append_to_comment_token(REPLACEMENT_CHARACTER, c as char);
                     }
                     // Anything else
                     // Append the current input character to the comment token's data.
@@ -2902,8 +2976,8 @@ where
                     // Two U+002D HYPHEN-MINUS characters (-)
                     // Consume those two characters, create a comment token whose data
                     // is the empty string, and switch to the comment start state.
-                    Some('-') => match self.consume_next_char() {
-                        Some('-') => {
+                    Some(b'-') => match self.consume_next_char() {
+                        Some(b'-') => {
                             self.create_comment_token("<!--");
                             self.state = State::CommentStart;
                         }
@@ -2913,13 +2987,13 @@ where
                     },
                     // ASCII case-insensitive match for the word "DOCTYPE"
                     // Consume those characters and switch to the DOCTYPE state.
-                    Some(d @ 'd' | d @ 'D') => match self.consume_next_char() {
-                        Some(o @ 'o' | o @ 'O') => match self.consume_next_char() {
-                            Some(c @ 'c' | c @ 'C') => match self.consume_next_char() {
-                                Some(t @ 't' | t @ 'T') => match self.consume_next_char() {
-                                    Some(y @ 'y' | y @ 'Y') => match self.consume_next_char() {
-                                        Some(p @ 'p' | p @ 'P') => match self.consume_next_char() {
-                                            Some(e @ 'e' | e @ 'E') => {
+                    Some(d @ b'd' | d @ b'D') => match self.consume_next_char() {
+                        Some(o @ b'o' | o @ b'O') => match self.consume_next_char() {
+                            Some(c @ b'c' | c @ b'C') => match self.consume_next_char() {
+                                Some(t @ b't' | t @ b'T') => match self.consume_next_char() {
+                                    Some(y @ b'y' | y @ b'Y') => match self.consume_next_char() {
+                                        Some(p @ b'p' | p @ b'P') => match self.consume_next_char() {
+                                            Some(e @ b'e' | e @ b'E') => {
                                                 self.state = State::Doctype;
 
                                                 let b = self.sub_buf.clone();
@@ -2927,13 +3001,13 @@ where
 
                                                 sub_buf.push('<');
                                                 sub_buf.push('!');
-                                                sub_buf.push(d);
-                                                sub_buf.push(o);
-                                                sub_buf.push(c);
-                                                sub_buf.push(t);
-                                                sub_buf.push(y);
-                                                sub_buf.push(p);
-                                                sub_buf.push(e);
+                                                sub_buf.push(d as char);
+                                                sub_buf.push(o as char);
+                                                sub_buf.push(c as char);
+                                                sub_buf.push(t as char);
+                                                sub_buf.push(y as char);
+                                                sub_buf.push(p as char);
+                                                sub_buf.push(e as char);
                                             }
                                             _ => {
                                                 anything_else(self);
@@ -2966,13 +3040,13 @@ where
                     // section state. Otherwise, this is a cdata-in-html-content parse
                     // error. Create a comment token whose data is the "[CDATA[" string.
                     // Switch to the bogus comment state.
-                    Some('[') => match self.consume_next_char() {
-                        Some('C') => match self.consume_next_char() {
-                            Some('D') => match self.consume_next_char() {
-                                Some('A') => match self.consume_next_char() {
-                                    Some('T') => match self.consume_next_char() {
-                                        Some('A') => match self.consume_next_char() {
-                                            Some('[') => {
+                    Some(b'[') => match self.consume_next_char() {
+                        Some(b'C') => match self.consume_next_char() {
+                            Some(b'D') => match self.consume_next_char() {
+                                Some(b'A') => match self.consume_next_char() {
+                                    Some(b'T') => match self.consume_next_char() {
+                                        Some(b'A') => match self.consume_next_char() {
+                                            Some(b'[') => {
                                                 if let Some(false) = self.is_adjusted_current_node_is_element_in_html_namespace {
                                                     self.state = State::CdataSection;
                                                 } else {
@@ -3023,13 +3097,13 @@ where
                 match self.consume_next_char() {
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the comment start dash state.
-                    Some('-') => {
+                    Some(b'-') => {
                         self.state = State::CommentStartDash;
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // This is an abrupt-closing-of-empty-comment parse error. Switch to the
                     // data state. Emit the current comment token.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.emit_error(ErrorKind::AbruptClosingOfEmptyComment);
                         self.state = State::Data;
                         self.emit_comment_token(Some(">"));
@@ -3047,13 +3121,13 @@ where
                 match self.consume_next_char() {
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the comment end state.
-                    Some('-') => {
+                    Some(b'-') => {
                         self.state = State::CommentEnd;
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // This is an abrupt-closing-of-empty-comment parse error. Switch to the
                     // data state. Emit the current comment token.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.emit_error(ErrorKind::AbruptClosingOfEmptyComment);
                         self.state = State::Data;
                         self.emit_comment_token(Some("->"));
@@ -3084,21 +3158,21 @@ where
                     // U+003C LESS-THAN SIGN (<)
                     // Append the current input character to the comment token's data. Switch to
                     // the comment less-than sign state.
-                    Some(c @ '<') => {
-                        self.append_to_comment_token(c, c);
+                    Some(c @ b'<') => {
+                        self.append_to_comment_token(c as char, c as char);
                         self.state = State::CommentLessThanSign;
                     }
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the comment end dash state.
-                    Some('-') => {
+                    Some(b'-') => {
                         self.state = State::CommentEndDash;
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the comment token's data.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
-                        self.append_to_comment_token(REPLACEMENT_CHARACTER, c);
+                        self.append_to_comment_token(REPLACEMENT_CHARACTER, c as char);
                     }
                     // EOF
                     // This is an eof-in-comment parse error. Emit the current comment token.
@@ -3133,14 +3207,14 @@ where
                     // U+0021 EXCLAMATION MARK (!)
                     // Append the current input character to the comment token's data. Switch to
                     // the comment less-than sign bang state.
-                    Some(c @ '!') => {
-                        self.append_to_comment_token(c, c);
+                    Some(c @ b'!') => {
+                        self.append_to_comment_token(c as char, c as char);
                         self.state = State::CommentLessThanSignBang;
                     }
                     // U+003C LESS-THAN SIGN (<)
                     // Append the current input character to the comment token's data.
-                    Some(c @ '<') => {
-                        self.append_to_comment_token(c, c);
+                    Some(c @ b'<') => {
+                        self.append_to_comment_token(c as char, c as char);
                     }
                     // Anything else
                     // Reconsume in the comment state.
@@ -3155,7 +3229,7 @@ where
                 match self.consume_next_char() {
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the comment less-than sign bang dash state.
-                    Some('-') => {
+                    Some(b'-') => {
                         self.state = State::CommentLessThanSignBangDash;
                     }
                     // Anything else
@@ -3171,7 +3245,7 @@ where
                 match self.consume_next_char() {
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the comment less-than sign bang dash dash state.
-                    Some('-') => {
+                    Some(b'-') => {
                         self.state = State::CommentLessThanSignBangDashDash;
                     }
                     // Anything else
@@ -3188,7 +3262,7 @@ where
                     // U+003E GREATER-THAN SIGN (>)
                     // EOF
                     // Reconsume in the comment end state.
-                    Some('>') | None => {
+                    Some(b'>') | None => {
                         self.reconsume_in_state(State::CommentEnd);
                     }
                     // Anything else
@@ -3205,7 +3279,7 @@ where
                 match self.consume_next_char() {
                     // U+002D HYPHEN-MINUS (-)
                     // Switch to the comment end state.
-                    Some('-') => {
+                    Some(b'-') => {
                         self.state = State::CommentEnd;
                     }
                     // EOF
@@ -3233,19 +3307,19 @@ where
                 match self.consume_next_char() {
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the current comment token.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.state = State::Data;
                         self.emit_comment_token(Some("-->"));
                     }
                     // U+0021 EXCLAMATION MARK (!)
                     // Switch to the comment end bang state.
-                    Some('!') => {
+                    Some(b'!') => {
                         self.state = State::CommentEndBang;
                     }
                     // U+002D HYPHEN-MINUS (-)
                     // Append a U+002D HYPHEN-MINUS character (-) to the comment token's data.
-                    Some(c @ '-') => {
-                        self.append_to_comment_token(c, c);
+                    Some(c @ b'-') => {
+                        self.append_to_comment_token(c as char, c as char);
                     }
                     // EOF
                     // This is an eof-in-comment parse error. Emit the current comment token.
@@ -3275,16 +3349,16 @@ where
                     // Append two U+002D HYPHEN-MINUS characters (-) and a U+0021 EXCLAMATION
                     // MARK character (!) to the comment token's data. Switch to the comment end
                     // dash state.
-                    Some(c @ '-') => {
-                        self.append_to_comment_token(c, c);
-                        self.append_to_comment_token('-', '-');
+                    Some(c @ b'-') => {
+                        self.append_to_comment_token(c as char, c as char);
+                        self.append_to_comment_token(c as char, c as char);
                         self.append_to_comment_token('!', '!');
                         self.state = State::CommentEndDash;
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // This is an incorrectly-closed-comment parse error. Switch to the data
                     // state. Emit the current comment token.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.emit_error(ErrorKind::IncorrectlyClosedComment);
                         self.state = State::Data;
                         self.emit_comment_token(Some(">"));
@@ -3326,7 +3400,7 @@ where
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Reconsume in the before DOCTYPE name state.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.reconsume_in_state(State::BeforeDoctypeName);
                     }
                     // EOF
@@ -3370,14 +3444,14 @@ where
                     Some(c) if is_ascii_upper_alpha(c) => {
                         self.append_raw_to_doctype_token(c);
                         self.create_doctype_token();
-                        self.set_doctype_token_name(c.to_ascii_lowercase());
+                        self.set_doctype_token_name(c.to_ascii_lowercase() as char);
                         self.state = State::DoctypeName;
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Create a new DOCTYPE
                     // token. Set the token's name to a U+FFFD REPLACEMENT CHARACTER character.
                     // Switch to the DOCTYPE name state.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                         self.create_doctype_token();
@@ -3388,7 +3462,7 @@ where
                     // This is a missing-doctype-name parse error. Create a new DOCTYPE token.
                     // Set its force-quirks flag to on. Switch to the data state. Emit the
                     // current token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::MissingDoctypeName);
                         self.create_doctype_token();
@@ -3416,7 +3490,12 @@ where
                         self.validate_input_stream_character(c);
                         self.append_raw_to_doctype_token(c);
                         self.create_doctype_token();
-                        self.set_doctype_token_name(c);
+                        let ch = if is_non_ascii(c) {
+                            self.current_char.unwrap_or(c as char)
+                        } else {
+                            c as char
+                        };
+                        self.set_doctype_token_name(ch);
                         self.state = State::DoctypeName;
                     }
                 }
@@ -3437,7 +3516,7 @@ where
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.finish_doctype_token_name();
                         self.state = State::Data;
@@ -3447,15 +3526,18 @@ where
                     // Append the lowercase version of the current input character (add 0x0020
                     // to the character's code point) to the current DOCTYPE token's name.
                     Some(c) if is_ascii_upper_alpha(c) => {
-                        self.consume_and_append_to_doctype_token_name(c, is_ascii_upper_alpha);
+                        self.consume_and_append_to_doctype_token_name(c, is_ascii_upper_alpha_char);
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the current DOCTYPE token's name.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
-                        self.append_to_doctype_token(Some(REPLACEMENT_CHARACTER), None, None);
+
+                        let b = self.buf.clone();
+                        let mut buf = b.borrow_mut();
+                        buf.push(REPLACEMENT_CHARACTER);
                     }
                     // EOF
                     // This is an eof-in-doctype parse error. Set the current DOCTYPE token's
@@ -3474,12 +3556,14 @@ where
                     // Append the current input character to the current DOCTYPE token's name.
                     Some(c) => {
                         self.validate_input_stream_character(c);
-                        self.consume_and_append_to_doctype_token_name(c, |c| {
-                            if !is_allowed_character(c) {
+                        self.consume_and_append_to_doctype_token_name(c, |ch| {
+                            if !is_allowed_character(ch) {
                                 return false;
                             }
 
-                            !is_spacy(c) && !matches!(c, '>' | '\x00') && !is_ascii_upper_alpha(c)
+                            !is_spacy_char(ch)
+                                && !matches!(ch, '>' | '\x00')
+                                && !is_ascii_upper_alpha_char(ch)
                         });
                     }
                 }
@@ -3500,7 +3584,7 @@ where
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.state = State::Data;
                         self.emit_doctype_token();
@@ -3534,12 +3618,12 @@ where
                         let b = self.buf.clone();
                         let mut buf = b.borrow_mut();
 
-                        buf.push(c);
+                        buf.push(c as char);
 
                         for _ in 0..5 {
                             match self.consume_next_char() {
                                 Some(c) => {
-                                    buf.push(c);
+                                    buf.push(c as char);
                                 }
                                 _ => {
                                     break;
@@ -3602,7 +3686,7 @@ where
                     // Set the current DOCTYPE token's public identifier to the empty string
                     // (not missing), then switch to the DOCTYPE public identifier
                     // (double-quoted) state.
-                    Some(c @ '"') => {
+                    Some(c @ b'"') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypePublicKeyword);
                         self.set_doctype_token_public_id();
@@ -3613,7 +3697,7 @@ where
                     // Set the current DOCTYPE token's public identifier to the empty string
                     // (not missing), then switch to the DOCTYPE public identifier
                     // (single-quoted) state.
-                    Some(c @ '\'') => {
+                    Some(c @ b'\'') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypePublicKeyword);
                         self.set_doctype_token_public_id();
@@ -3623,7 +3707,7 @@ where
                     // This is a missing-doctype-public-identifier parse error. Set the current
                     // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
                     // the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::MissingDoctypePublicIdentifier);
                         self.set_doctype_token_force_quirks();
@@ -3669,7 +3753,7 @@ where
                     // Set the current DOCTYPE token's public identifier to the empty string
                     // (not missing), then switch to the DOCTYPE public identifier
                     // (double-quoted) state.
-                    Some(c @ '"') => {
+                    Some(c @ b'"') => {
                         self.append_raw_to_doctype_token(c);
                         self.set_doctype_token_public_id();
                         self.state = State::DoctypePublicIdentifierDoubleQuoted;
@@ -3678,7 +3762,7 @@ where
                     // Set the current DOCTYPE token's public identifier to the empty string
                     // (not missing), then switch to the DOCTYPE public identifier
                     // (single-quoted) state.
-                    Some(c @ '\'') => {
+                    Some(c @ b'\'') => {
                         self.append_raw_to_doctype_token(c);
                         self.set_doctype_token_public_id();
                         self.state = State::DoctypePublicIdentifierSingleQuoted;
@@ -3687,7 +3771,7 @@ where
                     // This is a missing-doctype-public-identifier parse error. Set the current
                     // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
                     // the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::MissingDoctypePublicIdentifier);
                         self.set_doctype_token_force_quirks();
@@ -3723,7 +3807,7 @@ where
                 match self.consume_next_char() {
                     // U+0022 QUOTATION MARK (")
                     // Switch to the after DOCTYPE public identifier state.
-                    Some(c @ '"') => {
+                    Some(c @ b'"') => {
                         self.append_raw_to_doctype_token(c);
                         self.finish_doctype_token_public_id();
                         self.state = State::AfterDoctypePublicIdentifier;
@@ -3732,16 +3816,18 @@ where
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the current DOCTYPE token's public
                     // identifier.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
-                        self.append_to_doctype_token(None, Some(REPLACEMENT_CHARACTER), None);
+                        let b = self.buf.clone();
+                        let mut buf = b.borrow_mut();
+                        buf.push(REPLACEMENT_CHARACTER);
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // This is an abrupt-doctype-public-identifier parse error. Set the current
                     // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
                     // the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.finish_doctype_token_public_id();
                         self.emit_error(ErrorKind::AbruptDoctypePublicIdentifier);
@@ -3767,12 +3853,12 @@ where
                     // identifier.
                     Some(c) => {
                         self.validate_input_stream_character(c);
-                        self.consume_and_append_to_doctype_token_public_id(c, |c| {
-                            if !is_allowed_character(c) {
+                        self.consume_and_append_to_doctype_token_public_id(c, |ch| {
+                            if !is_allowed_character(ch) {
                                 return false;
                             }
 
-                            !matches!(c, '"' | '\x00' | '>' | '\r')
+                            !matches!(ch, '"' | '\x00' | '>' | '\r')
                         });
                     }
                 }
@@ -3783,7 +3869,7 @@ where
                 match self.consume_next_char() {
                     // U+0027 APOSTROPHE (')
                     // Switch to the after DOCTYPE public identifier state.
-                    Some(c @ '\'') => {
+                    Some(c @ b'\'') => {
                         self.finish_doctype_token_public_id();
                         self.append_raw_to_doctype_token(c);
                         self.state = State::AfterDoctypePublicIdentifier;
@@ -3792,16 +3878,18 @@ where
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the current DOCTYPE token's public
                     // identifier.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
-                        self.append_to_doctype_token(None, Some(REPLACEMENT_CHARACTER), None);
+                        let b = self.buf.clone();
+                        let mut buf = b.borrow_mut();
+                        buf.push(REPLACEMENT_CHARACTER);
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // This is an abrupt-doctype-public-identifier parse error. Set the current
                     // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
                     // the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.finish_doctype_token_public_id();
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::AbruptDoctypePublicIdentifier);
@@ -3827,12 +3915,12 @@ where
                     // identifier.
                     Some(c) => {
                         self.validate_input_stream_character(c);
-                        self.consume_and_append_to_doctype_token_public_id(c, |c| {
-                            if !is_allowed_character(c) {
+                        self.consume_and_append_to_doctype_token_public_id(c, |ch| {
+                            if !is_allowed_character(ch) {
                                 return false;
                             }
 
-                            !matches!(c, '\'' | '\x00' | '>' | '\r')
+                            !matches!(ch, '\'' | '\x00' | '>' | '\r')
                         });
                     }
                 }
@@ -3852,7 +3940,7 @@ where
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.state = State::Data;
                         self.emit_doctype_token();
@@ -3862,7 +3950,7 @@ where
                     // parse error. Set the current DOCTYPE token's system
                     // identifier to the empty string (not missing), then switch
                     // to the DOCTYPE system identifier (double-quoted) state.
-                    Some(c @ '"') => {
+                    Some(c @ b'"') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(
                             ErrorKind::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers,
@@ -3875,7 +3963,7 @@ where
                     // parse error. Set the current DOCTYPE token's system
                     // identifier to the empty string (not missing), then switch
                     // to the DOCTYPE system identifier (single-quoted) state.
-                    Some(c @ '\'') => {
+                    Some(c @ b'\'') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(
                             ErrorKind::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers,
@@ -3920,7 +4008,7 @@ where
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.state = State::Data;
                         self.emit_doctype_token();
@@ -3929,7 +4017,7 @@ where
                     // Set the current DOCTYPE token's system identifier to the empty string
                     // (not missing), then switch to the DOCTYPE system identifier
                     // (double-quoted) state.
-                    Some(c @ '"') => {
+                    Some(c @ b'"') => {
                         self.append_raw_to_doctype_token(c);
                         self.set_doctype_token_system_id();
                         self.state = State::DoctypeSystemIdentifierDoubleQuoted;
@@ -3938,7 +4026,7 @@ where
                     // Set the current DOCTYPE token's system identifier to the empty string
                     // (not missing), then switch to the DOCTYPE system identifier
                     // (single-quoted) state.
-                    Some(c @ '\'') => {
+                    Some(c @ b'\'') => {
                         self.append_raw_to_doctype_token(c);
                         self.set_doctype_token_system_id();
                         self.state = State::DoctypeSystemIdentifierSingleQuoted;
@@ -3984,7 +4072,7 @@ where
                     // Set the current DOCTYPE token's system identifier to the empty string
                     // (not missing), then switch to the DOCTYPE system identifier
                     // (double-quoted) state.
-                    Some(c @ '"') => {
+                    Some(c @ b'"') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypeSystemKeyword);
                         self.set_doctype_token_system_id();
@@ -3995,7 +4083,7 @@ where
                     // Set the current DOCTYPE token's system identifier to the empty string
                     // (not missing), then switch to the DOCTYPE system identifier
                     // (single-quoted) state.
-                    Some(c @ '\'') => {
+                    Some(c @ b'\'') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypeSystemKeyword);
                         self.set_doctype_token_system_id();
@@ -4005,7 +4093,7 @@ where
                     // This is a missing-doctype-system-identifier parse error. Set the current
                     // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
                     // the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::MissingDoctypeSystemIdentifier);
                         self.set_doctype_token_force_quirks();
@@ -4051,7 +4139,7 @@ where
                     // Set the current DOCTYPE token's system identifier to the empty string
                     // (not missing), then switch to the DOCTYPE system identifier
                     // (double-quoted) state.
-                    Some(c @ '"') => {
+                    Some(c @ b'"') => {
                         self.append_raw_to_doctype_token(c);
                         self.set_doctype_token_system_id();
                         self.state = State::DoctypeSystemIdentifierDoubleQuoted;
@@ -4060,7 +4148,7 @@ where
                     // Set the current DOCTYPE token's system identifier to the empty string
                     // (not missing), then switch to the DOCTYPE system identifier
                     // (single-quoted) state.
-                    Some(c @ '\'') => {
+                    Some(c @ b'\'') => {
                         self.append_raw_to_doctype_token(c);
                         self.set_doctype_token_system_id();
                         self.state = State::DoctypeSystemIdentifierSingleQuoted;
@@ -4069,7 +4157,7 @@ where
                     // This is a missing-doctype-system-identifier parse error. Set the current
                     // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
                     // the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::EofInDoctype);
                         self.set_doctype_token_force_quirks();
@@ -4105,7 +4193,7 @@ where
                 match self.consume_next_char() {
                     // U+0027 APOSTROPHE (')
                     // Switch to the after DOCTYPE system identifier state.
-                    Some(c @ '"') => {
+                    Some(c @ b'"') => {
                         self.finish_doctype_token_system_id();
                         self.append_raw_to_doctype_token(c);
                         self.state = State::AfterDoctypeSystemIdentifier;
@@ -4114,16 +4202,18 @@ where
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the current DOCTYPE token's system
                     // identifier.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
-                        self.append_to_doctype_token(None, None, Some(REPLACEMENT_CHARACTER));
+                        let b = self.buf.clone();
+                        let mut buf = b.borrow_mut();
+                        buf.push(REPLACEMENT_CHARACTER);
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // This is an abrupt-doctype-system-identifier parse error. Set the current
                     // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
                     // the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.finish_doctype_token_system_id();
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::AbruptDoctypeSystemIdentifier);
@@ -4149,12 +4239,12 @@ where
                     // identifier.
                     Some(c) => {
                         self.validate_input_stream_character(c);
-                        self.consume_and_append_to_doctype_token_system_id(c, |c| {
-                            if !is_allowed_character(c) {
+                        self.consume_and_append_to_doctype_token_system_id(c, |ch| {
+                            if !is_allowed_character(ch) {
                                 return false;
                             }
 
-                            !matches!(c, '"' | '\x00' | '>' | '\r')
+                            !matches!(ch, '"' | '\x00' | '>' | '\r')
                         });
                     }
                 }
@@ -4165,7 +4255,7 @@ where
                 match self.consume_next_char() {
                     // U+0027 APOSTROPHE (')
                     // Switch to the after DOCTYPE system identifier state.
-                    Some(c @ '\'') => {
+                    Some(c @ b'\'') => {
                         self.finish_doctype_token_system_id();
                         self.append_raw_to_doctype_token(c);
                         self.state = State::AfterDoctypeSystemIdentifier;
@@ -4174,16 +4264,18 @@ where
                     // This is an unexpected-null-character parse error. Append a U+FFFD
                     // REPLACEMENT CHARACTER character to the current DOCTYPE token's system
                     // identifier.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
-                        self.append_to_doctype_token(None, None, Some(REPLACEMENT_CHARACTER));
+                        let b = self.buf.clone();
+                        let mut buf = b.borrow_mut();
+                        buf.push(REPLACEMENT_CHARACTER);
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // This is an abrupt-doctype-system-identifier parse error. Set the current
                     // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
                     // the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.finish_doctype_token_system_id();
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::AbruptDoctypeSystemIdentifier);
@@ -4209,12 +4301,12 @@ where
                     // identifier.
                     Some(c) => {
                         self.validate_input_stream_character(c);
-                        self.consume_and_append_to_doctype_token_system_id(c, |c| {
-                            if !is_allowed_character(c) {
+                        self.consume_and_append_to_doctype_token_system_id(c, |ch| {
+                            if !is_allowed_character(ch) {
                                 return false;
                             }
 
-                            !matches!(c, '\'' | '\x00' | '>' | '\r')
+                            !matches!(ch, '\'' | '\x00' | '>' | '\r')
                         });
                     }
                 }
@@ -4233,7 +4325,7 @@ where
                     }
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the current DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.state = State::Data;
                         self.emit_doctype_token();
@@ -4266,14 +4358,14 @@ where
                 match self.consume_next_char() {
                     // U+003E GREATER-THAN SIGN (>)
                     // Switch to the data state. Emit the DOCTYPE token.
-                    Some(c @ '>') => {
+                    Some(c @ b'>') => {
                         self.append_raw_to_doctype_token(c);
                         self.state = State::Data;
                         self.emit_doctype_token();
                     }
                     // U+0000 NULL
                     // This is an unexpected-null-character parse error. Ignore the character.
-                    Some(c @ '\x00') => {
+                    Some(c @ b'\x00') => {
                         self.append_raw_to_doctype_token(c);
                         self.emit_error(ErrorKind::UnexpectedNullCharacter);
                     }
@@ -4299,7 +4391,7 @@ where
                 match self.consume_next_char() {
                     // U+005D RIGHT SQUARE BRACKET (])
                     // Switch to the CDATA section bracket state.
-                    Some(']') => {
+                    Some(b']') => {
                         self.state = State::CdataSectionBracket;
                     }
                     // EOF
@@ -4324,14 +4416,14 @@ where
                 match self.consume_next_char() {
                     // U+005D RIGHT SQUARE BRACKET (])
                     // Switch to the CDATA section end state.
-                    Some(']') => {
+                    Some(b']') => {
                         self.state = State::CdataSectionEnd;
                     }
                     // Anything else
                     // Emit a U+005D RIGHT SQUARE BRACKET character token. Reconsume in the
                     // CDATA section state.
                     _ => {
-                        self.emit_character_token(']');
+                        self.emit_character_token(b']');
                         self.reconsume_in_state(State::CdataSection);
                     }
                 }
@@ -4342,20 +4434,20 @@ where
                 match self.consume_next_char() {
                     // U+005D RIGHT SQUARE BRACKET (])
                     // Emit a U+005D RIGHT SQUARE BRACKET character token.
-                    Some(c @ ']') => {
+                    Some(c @ b']') => {
                         self.emit_character_token_with_raw(']', c);
                     }
                     // U+003E GREATER-THAN SIGN character
                     // Switch to the data state.
-                    Some('>') => {
+                    Some(b'>') => {
                         self.state = State::Data;
                     }
                     // Anything else
                     // Emit two U+005D RIGHT SQUARE BRACKET character tokens. Reconsume in the
                     // CDATA section state.
                     _ => {
-                        self.emit_character_token(']');
-                        self.emit_character_token(']');
+                        self.emit_character_token(b']');
+                        self.emit_character_token(b']');
                         self.reconsume_in_state(State::CdataSection);
                     }
                 }
@@ -4377,8 +4469,8 @@ where
                     // U+0023 NUMBER SIGN (#)
                     // Append the current input character to the temporary buffer. Switch to the
                     // numeric character reference state.
-                    Some(c @ '#') => {
-                        self.temporary_buffer.push(c);
+                    Some(c @ b'#') => {
+                        self.temporary_buffer.push(c as char);
                         self.state = State::NumericCharacterReference;
                     }
                     // Anything else
@@ -4409,7 +4501,7 @@ where
 
                 // No need to validate input, because we reset position if nothing was found
                 while let Some(c) = &self.consume_next_char() {
-                    entity_temporary_buffer.push(*c);
+                    entity_temporary_buffer.push(*c as char);
 
                     if let Some(found_entity) = HTML_ENTITIES.get(&entity_temporary_buffer) {
                         entity = Some(found_entity);
@@ -4448,7 +4540,7 @@ where
                 match entity {
                     Some(entity) => {
                         let is_next_equals_sign_or_ascii_alphanumeric = match self.next() {
-                            Some('=') => true,
+                            Some(b'=') => true,
                             Some(c) if c.is_ascii_alphanumeric() => true,
                             _ => false,
                         };
@@ -4514,7 +4606,7 @@ where
                     // Otherwise, emit the current input character as a character token.
                     Some(c) if c.is_ascii_alphanumeric() => {
                         if self.is_consumed_as_part_of_an_attribute() {
-                            self.append_to_attribute_token_value(Some(c), Some(c));
+                            self.append_to_attribute_token_value(Some(c as char), Some(c as char));
                         } else {
                             self.emit_character_token(c);
                         }
@@ -4522,7 +4614,7 @@ where
                     // U+003B SEMICOLON (;)
                     // This is an unknown-named-character-reference parse error. Reconsume in
                     // the return state.
-                    Some(';') => {
+                    Some(b';') => {
                         self.emit_error(ErrorKind::UnknownNamedCharacterReference);
                         self.reconsume_in_state(self.return_state.clone());
                     }
@@ -4543,8 +4635,8 @@ where
                     // U+0058 LATIN CAPITAL LETTER X
                     // Append the current input character to the temporary buffer. Switch to the
                     // hexadecimal character reference start state.
-                    Some(c @ 'x' | c @ 'X') => {
-                        self.temporary_buffer.push(c);
+                    Some(c @ b'x' | c @ b'X') => {
+                        self.temporary_buffer.push(c as char);
                         self.state = State::HexademicalCharacterReferenceStart;
                     }
                     // Anything else
@@ -4604,7 +4696,7 @@ where
                     // to the character reference code.
                     Some(c) if c.is_ascii_digit() => match &mut self.character_reference_code {
                         Some(character_reference_code) => {
-                            character_reference_code.push((16, c as u32 - 0x30, Some(c)));
+                            character_reference_code.push((16, c as u32 - 0x30, Some(c as char)));
                         }
                         _ => {
                             unreachable!();
@@ -4616,7 +4708,7 @@ where
                     // character's code point) to the character reference code.
                     Some(c) if is_upper_hex_digit(c) => match &mut self.character_reference_code {
                         Some(character_reference_code) => {
-                            character_reference_code.push((16, c as u32 - 0x37, Some(c)));
+                            character_reference_code.push((16, c as u32 - 0x37, Some(c as char)));
                         }
                         _ => {
                             unreachable!();
@@ -4628,7 +4720,7 @@ where
                     // character's code point) to the character reference code.
                     Some(c) if is_lower_hex_digit(c) => match &mut self.character_reference_code {
                         Some(character_reference_code) => {
-                            character_reference_code.push((16, c as u32 - 0x57, Some(c)));
+                            character_reference_code.push((16, c as u32 - 0x57, Some(c as char)));
                         }
                         _ => {
                             unreachable!();
@@ -4636,7 +4728,7 @@ where
                     },
                     // U+003B SEMICOLON
                     // Switch to the numeric character reference end state.
-                    Some(';') => {
+                    Some(b';') => {
                         self.state = State::NumericCharacterReferenceEnd;
                     }
                     // Anything else
@@ -4658,7 +4750,7 @@ where
                     // to the character reference code.
                     Some(c) if c.is_ascii_digit() => match &mut self.character_reference_code {
                         Some(character_reference_code) => {
-                            character_reference_code.push((10, c as u32 - 0x30, Some(c)));
+                            character_reference_code.push((10, c as u32 - 0x30, Some(c as char)));
                         }
                         _ => {
                             unreachable!();
@@ -4666,7 +4758,7 @@ where
                     },
                     // U+003B SEMICOLON
                     // Switch to the numeric character reference end state.
-                    Some(';') => self.state = State::NumericCharacterReferenceEnd,
+                    Some(b';') => self.state = State::NumericCharacterReferenceEnd,
                     // Anything else
                     // This is a missing-semicolon-after-character-reference parse error.
                     // Reconsume in the numeric character reference end state.
@@ -4832,7 +4924,7 @@ where
                 raw.push_str(&old_temporary_buffer);
                 raw.push_str(&raw_char_ref);
 
-                if self.cur == Some(';') {
+                if self.cur == Some(b';') {
                     raw.push(';');
                 }
 
@@ -4855,12 +4947,10 @@ where
     }
 
     #[inline(always)]
-    fn skip_whitespaces(&mut self, c: char) {
-        if c == '\r' && self.input.cur() == Some('\n') {
-            unsafe {
-                // Safety: cur() is Some
-                self.input.bump();
-            }
+    fn skip_whitespaces(&mut self, c: u8) {
+        if c == b'\r' && self.input.cur() == Some(b'\n') {
+            // ASCII newline is always 1 byte
+            self.input.bump_bytes(1);
         }
     }
 }
@@ -4868,8 +4958,13 @@ where
 // By spec '\r` removed before tokenizer, but we keep them to have better AST
 // and don't break logic to ignore characters
 #[inline(always)]
-fn is_spacy(c: char) -> bool {
-    matches!(c, '\x09' | '\x0a' | '\x0d' | '\x0c' | '\x20')
+fn is_spacy(c: u8) -> bool {
+    matches!(c, b'\x09' | b'\x0a' | b'\x0d' | b'\x0c' | b'\x20')
+}
+
+#[inline(always)]
+fn is_spacy_char(c: char) -> bool {
+    is_spacy(c as u8)
 }
 
 #[inline(always)]
@@ -4932,35 +5027,40 @@ fn is_noncharacter(c: u32) -> bool {
 }
 
 #[inline(always)]
-fn is_upper_hex_digit(c: char) -> bool {
-    matches!(c, '0'..='9' | 'A'..='F')
+fn is_upper_hex_digit(c: u8) -> bool {
+    matches!(c, b'0'..=b'9' | b'A'..=b'F')
 }
 
 #[inline(always)]
-fn is_lower_hex_digit(c: char) -> bool {
-    matches!(c, '0'..='9' | 'a'..='f')
+fn is_lower_hex_digit(c: u8) -> bool {
+    matches!(c, b'0'..=b'9' | b'a'..=b'f')
 }
 
 #[inline(always)]
-fn is_ascii_hex_digit(c: char) -> bool {
+fn is_ascii_hex_digit(c: u8) -> bool {
     is_upper_hex_digit(c) || is_lower_hex_digit(c)
 }
 
 #[inline(always)]
-fn is_ascii_upper_alpha(c: char) -> bool {
+fn is_ascii_upper_alpha(c: u8) -> bool {
     c.is_ascii_uppercase()
 }
 
 #[inline(always)]
-fn is_ascii_lower_alpha(c: char) -> bool {
+fn is_ascii_lower_alpha(c: u8) -> bool {
     c.is_ascii_lowercase()
 }
 
 #[inline(always)]
-fn is_ascii_alpha(c: char) -> bool {
+fn is_ascii_alpha(c: u8) -> bool {
     is_ascii_upper_alpha(c) || is_ascii_lower_alpha(c)
 }
 
+#[inline(always)]
+fn is_ascii_upper_alpha_char(c: char) -> bool {
+    c.is_ascii_uppercase()
+}
+
 #[inline(always)]
 fn is_allowed_control_character(c: u32) -> bool {
     c != 0x00 && is_control(c)
@@ -4976,3 +5076,8 @@ fn is_allowed_character(c: char) -> bool {
 
     return true;
 }
+
+#[inline(always)]
+fn is_non_ascii(c: u8) -> bool {
+    c >= 0x80
+}
diff --git a/crates/swc_xml_parser/src/lexer/mod.rs b/crates/swc_xml_parser/src/lexer/mod.rs
index 95b4c5057614..ebac6ad14726 100644
--- a/crates/swc_xml_parser/src/lexer/mod.rs
+++ b/crates/swc_xml_parser/src/lexer/mod.rs
@@ -173,9 +173,9 @@ where
 
         // A leading Byte Order Mark (BOM) causes the character encoding argument to be
         // ignored and will itself be skipped.
-        if lexer.input.is_at_start() && lexer.input.cur() == Some('\u{feff}') {
+        if lexer.input.is_at_start() && lexer.input.cur_as_char() == Some('\u{feff}') {
             unsafe {
-                // Safety: cur() is Some('\u{feff}')
+                // Safety: cur_as_char() is Some('\u{feff}')
                 lexer.input.bump();
             }
         }
@@ -224,7 +224,7 @@ where
 {
     #[inline(always)]
     fn next(&mut self) -> Option<char> {
-        self.input.cur()
+        self.input.cur_as_char()
     }
 
     // Any occurrences of surrogates are surrogate-in-input-stream parse errors. Any
@@ -249,12 +249,12 @@ where
 
     #[inline(always)]
     fn consume(&mut self) {
-        self.cur = self.input.cur();
+        self.cur = self.input.cur_as_char();
         self.cur_pos = self.input.cur_pos();
 
         if self.cur.is_some() {
             unsafe {
-                // Safety: cur() is Some(c)
+                // Safety: cur_as_char() is Some(c)
                 self.input.bump();
             }
         }
@@ -573,9 +573,9 @@ where
 
                 raw.push(c);
 
-                if self.input.cur() == Some('\n') {
+                if self.input.cur() == Some(b'\n') {
                     unsafe {
-                        // Safety: cur() is Some('\n')
+                        // Safety: cur() is Some(b'\n')
                         self.input.bump();
                     }
 
@@ -895,9 +895,9 @@ where
 
                 raw_c.push(c);
 
-                if self.input.cur() == Some('\n') {
+                if self.input.cur() == Some(b'\n') {
                     unsafe {
-                        // Safety: cur() is Some('\n')
+                        // Safety: cur() is Some(b'\n')
                         self.input.bump();
                     }
 
@@ -962,9 +962,9 @@ where
 
             raw.push(c);
 
-            if self.input.cur() == Some('\n') {
+            if self.input.cur() == Some(b'\n') {
                 unsafe {
-                    // Safety: cur() is Some('\n')
+                    // Safety: cur() is Some(b'\n')
                     self.input.bump();
                 }
 
@@ -3104,9 +3104,9 @@ where
 
     #[inline(always)]
     fn skip_next_lf(&mut self, c: char) {
-        if c == '\r' && self.input.cur() == Some('\n') {
+        if c == '\r' && self.input.cur() == Some(b'\n') {
             unsafe {
-                // Safety: cur() is Some('\n')
+                // Safety: cur() is Some(b'\n')
                 self.input.bump();
             }
         }