@@ -548,8 +548,8 @@ impl Parser {
548
548
'[' => match self . maybe_parse_ascii ( ) {
549
549
Some ( class2) => class. ranges . extend ( class2) ,
550
550
None => {
551
- self . bump ( ) ;
552
- try! ( self . parse_class_range ( & mut class , '[' ) )
551
+ return Err ( self . err (
552
+ ErrorKind :: UnsupportedClassChar ( '[' ) ) ) ;
553
553
}
554
554
} ,
555
555
'\\' => match try!( self . parse_escape ( ) ) {
@@ -582,6 +582,16 @@ impl Parser {
582
582
let _ = try!( self . codepoint_to_one_byte ( start) ) ;
583
583
}
584
584
self . bump ( ) ;
585
+ match start {
586
+ '&' |'~' |'-' => {
587
+ // Only report an error if we see && or ~~ or --.
588
+ if self . peek_is ( start) {
589
+ return Err ( self . err (
590
+ ErrorKind :: UnsupportedClassChar ( start) ) ) ;
591
+ }
592
+ }
593
+ _ => { }
594
+ }
585
595
try!( self . parse_class_range ( & mut class, start) ) ;
586
596
}
587
597
}
@@ -654,8 +664,11 @@ impl Parser {
654
664
// Because `parse_escape` can never return `LeftParen`.
655
665
_ => unreachable ! ( ) ,
656
666
} ,
657
- _ => {
658
- let c = self . bump ( ) ;
667
+ c => {
668
+ self . bump ( ) ;
669
+ if c == '-' {
670
+ return Err ( self . err ( ErrorKind :: UnsupportedClassChar ( '-' ) ) ) ;
671
+ }
659
672
if !self . flags . unicode {
660
673
let _ = try!( self . codepoint_to_one_byte ( c) ) ;
661
674
}
@@ -1212,7 +1225,7 @@ fn is_valid_capture_char(c: char) -> bool {
1212
1225
pub fn is_punct ( c : char ) -> bool {
1213
1226
match c {
1214
1227
'\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
1215
- '[' | ']' | '{' | '}' | '^' | '$' | '#' => true ,
1228
+ '[' | ']' | '{' | '}' | '^' | '$' | '#' | '&' | '-' | '~' => true ,
1216
1229
_ => false ,
1217
1230
}
1218
1231
}
@@ -2191,9 +2204,9 @@ mod tests {
2191
2204
2192
2205
#[ test]
2193
2206
fn class_brackets ( ) {
2194
- assert_eq ! ( p( "[]]" ) , Expr :: Class ( class( & [ ( ']' , ']' ) ] ) ) ) ;
2195
- assert_eq ! ( p( "[][]" ) , Expr :: Class ( class( & [ ( '[' , '[' ) , ( ']' , ']' ) ] ) ) ) ;
2196
- assert_eq ! ( p( "[ []]") , Expr :: Concat ( vec![
2207
+ assert_eq ! ( p( r "[]]") , Expr :: Class ( class( & [ ( ']' , ']' ) ] ) ) ) ;
2208
+ assert_eq ! ( p( r "[]\ []") , Expr :: Class ( class( & [ ( '[' , '[' ) , ( ']' , ']' ) ] ) ) ) ;
2209
+ assert_eq ! ( p( r"[\ []]") , Expr :: Concat ( vec![
2197
2210
Expr :: Class ( class( & [ ( '[' , '[' ) ] ) ) ,
2198
2211
lit( ']' ) ,
2199
2212
] ) ) ;
@@ -2208,6 +2221,31 @@ mod tests {
2208
2221
] ) ) ;
2209
2222
}
2210
2223
2224
+ #[ test]
2225
+ fn class_special_escaped_set_chars ( ) {
2226
+ // These tests ensure that some special characters require escaping
2227
+ // for use in character classes. The intention is to use these
2228
+ // characters to implement sets as described in UTC#18 RL1.3. Once
2229
+ // that's done, these tests should be removed and replaced with others.
2230
+ assert_eq ! ( p( r"[\[]" ) , Expr :: Class ( class( & [ ( '[' , '[' ) ] ) ) ) ;
2231
+ assert_eq ! ( p( r"[&]" ) , Expr :: Class ( class( & [ ( '&' , '&' ) ] ) ) ) ;
2232
+ assert_eq ! ( p( r"[\&]" ) , Expr :: Class ( class( & [ ( '&' , '&' ) ] ) ) ) ;
2233
+ assert_eq ! ( p( r"[\&\&]" ) , Expr :: Class ( class( & [ ( '&' , '&' ) ] ) ) ) ;
2234
+ assert_eq ! ( p( r"[\x00-&]" ) , Expr :: Class ( class( & [ ( '\u{0}' , '&' ) ] ) ) ) ;
2235
+ assert_eq ! ( p( r"[&-\xFF]" ) , Expr :: Class ( class( & [ ( '&' , '\u{FF}' ) ] ) ) ) ;
2236
+
2237
+ assert_eq ! ( p( r"[~]" ) , Expr :: Class ( class( & [ ( '~' , '~' ) ] ) ) ) ;
2238
+ assert_eq ! ( p( r"[\~]" ) , Expr :: Class ( class( & [ ( '~' , '~' ) ] ) ) ) ;
2239
+ assert_eq ! ( p( r"[\~\~]" ) , Expr :: Class ( class( & [ ( '~' , '~' ) ] ) ) ) ;
2240
+ assert_eq ! ( p( r"[\x00-~]" ) , Expr :: Class ( class( & [ ( '\u{0}' , '~' ) ] ) ) ) ;
2241
+ assert_eq ! ( p( r"[~-\xFF]" ) , Expr :: Class ( class( & [ ( '~' , '\u{FF}' ) ] ) ) ) ;
2242
+
2243
+ assert_eq ! ( p( r"[+-\-]" ) , Expr :: Class ( class( & [ ( '+' , '-' ) ] ) ) ) ;
2244
+ assert_eq ! ( p( r"[a-a\--\xFF]" ) , Expr :: Class ( class( & [
2245
+ ( '-' , '\u{FF}' ) ,
2246
+ ] ) ) ) ;
2247
+ }
2248
+
2211
2249
#[ test]
2212
2250
fn class_overlapping ( ) {
2213
2251
assert_eq ! ( p( "[a-fd-h]" ) , Expr :: Class ( class( & [ ( 'a' , 'h' ) ] ) ) ) ;
@@ -2759,6 +2797,19 @@ mod tests {
2759
2797
test_err ! ( r"(?-u)[^\x00-\xFF]" , 17 , ErrorKind :: EmptyClass , flags) ;
2760
2798
}
2761
2799
2800
+ #[ test]
2801
+ fn error_class_unsupported_char ( ) {
2802
+ // These tests ensure that some unescaped special characters are
2803
+ // rejected in character classes. The intention is to use these
2804
+ // characters to implement sets as described in UTC#18 RL1.3. Once
2805
+ // that's done, these tests should be removed and replaced with others.
2806
+ test_err ! ( "[[]" , 1 , ErrorKind :: UnsupportedClassChar ( '[' ) ) ;
2807
+ test_err ! ( "[&&]" , 2 , ErrorKind :: UnsupportedClassChar ( '&' ) ) ;
2808
+ test_err ! ( "[~~]" , 2 , ErrorKind :: UnsupportedClassChar ( '~' ) ) ;
2809
+ test_err ! ( "[+--]" , 4 , ErrorKind :: UnsupportedClassChar ( '-' ) ) ;
2810
+ test_err ! ( r"[a-a--\xFF]" , 5 , ErrorKind :: UnsupportedClassChar ( '-' ) ) ;
2811
+ }
2812
+
2762
2813
#[ test]
2763
2814
fn error_duplicate_capture_name ( ) {
2764
2815
test_err ! ( "(?P<a>.)(?P<a>.)" , 14 ,
0 commit comments