@@ -116,9 +116,9 @@ impl char {
116116
117117 // the code is split up here to improve execution speed for cases where
118118 // the `radix` is constant and 10 or smaller
119- let val = if radix <= 10 {
119+ let val = if radix <= 10 {
120120 match self {
121- '0' ..= '9' => self as u32 - '0' as u32 ,
121+ '0' ..='9' => self as u32 - '0' as u32 ,
122122 _ => return None ,
123123 }
124124 } else {
@@ -130,8 +130,11 @@ impl char {
130130 }
131131 } ;
132132
133- if val < radix { Some ( val) }
134- else { None }
133+ if val < radix {
134+ Some ( val)
135+ } else {
136+ None
137+ }
135138 }
136139
137140 /// Returns an iterator that yields the hexadecimal Unicode escape of a
@@ -303,8 +306,8 @@ impl char {
303306 '\r' => EscapeDefaultState :: Backslash ( 'r' ) ,
304307 '\n' => EscapeDefaultState :: Backslash ( 'n' ) ,
305308 '\\' | '\'' | '"' => EscapeDefaultState :: Backslash ( self ) ,
306- '\x20' ..= '\x7e' => EscapeDefaultState :: Char ( self ) ,
307- _ => EscapeDefaultState :: Unicode ( self . escape_unicode ( ) )
309+ '\x20' ..='\x7e' => EscapeDefaultState :: Char ( self ) ,
310+ _ => EscapeDefaultState :: Unicode ( self . escape_unicode ( ) ) ,
308311 } ;
309312 EscapeDefault { state : init_state }
310313 }
@@ -436,30 +439,31 @@ impl char {
436439 pub fn encode_utf8 ( self , dst : & mut [ u8 ] ) -> & mut str {
437440 let code = self as u32 ;
438441 unsafe {
439- let len =
440- if code < MAX_ONE_B && !dst. is_empty ( ) {
442+ let len = if code < MAX_ONE_B && !dst. is_empty ( ) {
441443 * dst. get_unchecked_mut ( 0 ) = code as u8 ;
442444 1
443445 } else if code < MAX_TWO_B && dst. len ( ) >= 2 {
444446 * dst. get_unchecked_mut ( 0 ) = ( code >> 6 & 0x1F ) as u8 | TAG_TWO_B ;
445447 * dst. get_unchecked_mut ( 1 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
446448 2
447- } else if code < MAX_THREE_B && dst. len ( ) >= 3 {
449+ } else if code < MAX_THREE_B && dst. len ( ) >= 3 {
448450 * dst. get_unchecked_mut ( 0 ) = ( code >> 12 & 0x0F ) as u8 | TAG_THREE_B ;
449- * dst. get_unchecked_mut ( 1 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
451+ * dst. get_unchecked_mut ( 1 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
450452 * dst. get_unchecked_mut ( 2 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
451453 3
452454 } else if dst. len ( ) >= 4 {
453455 * dst. get_unchecked_mut ( 0 ) = ( code >> 18 & 0x07 ) as u8 | TAG_FOUR_B ;
454456 * dst. get_unchecked_mut ( 1 ) = ( code >> 12 & 0x3F ) as u8 | TAG_CONT ;
455- * dst. get_unchecked_mut ( 2 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
457+ * dst. get_unchecked_mut ( 2 ) = ( code >> 6 & 0x3F ) as u8 | TAG_CONT ;
456458 * dst. get_unchecked_mut ( 3 ) = ( code & 0x3F ) as u8 | TAG_CONT ;
457459 4
458460 } else {
459- panic ! ( "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
461+ panic ! (
462+ "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}" ,
460463 from_u32_unchecked( code) . len_utf8( ) ,
461464 code,
462- dst. len( ) )
465+ dst. len( ) ,
466+ )
463467 } ;
464468 from_utf8_unchecked_mut ( dst. get_unchecked_mut ( ..len) )
465469 }
@@ -515,15 +519,24 @@ impl char {
515519 * dst. get_unchecked_mut ( 1 ) = 0xDC00 | ( ( code as u16 ) & 0x3FF ) ;
516520 slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 2 )
517521 } else {
518- panic ! ( "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}" ,
522+ panic ! (
523+ "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}" ,
519524 from_u32_unchecked( code) . len_utf16( ) ,
520525 code,
521- dst. len( ) )
526+ dst. len( ) ,
527+ )
522528 }
523529 }
524530 }
525531
526- /// Returns `true` if this `char` is an alphabetic code point, and false if not.
532+ /// Returns `true` if this `char` has the `Alphabetic` property.
533+ ///
534+ /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
535+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
536+ ///
537+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
538+ /// [ucd]: https://www.unicode.org/reports/tr44/
539+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
527540 ///
528541 /// # Examples
529542 ///
@@ -547,10 +560,14 @@ impl char {
547560 }
548561 }
549562
550- /// Returns `true` if this `char` is lowercase .
563+ /// Returns `true` if this `char` has the `Lowercase` property .
551564 ///
552- /// 'Lowercase' is defined according to the terms of the Unicode Derived Core
553- /// Property `Lowercase`.
565+ /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
566+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
567+ ///
568+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
569+ /// [ucd]: https://www.unicode.org/reports/tr44/
570+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
554571 ///
555572 /// # Examples
556573 ///
@@ -575,10 +592,14 @@ impl char {
575592 }
576593 }
577594
578- /// Returns `true` if this `char` is uppercase.
595+ /// Returns `true` if this `char` has the `Uppercase` property.
596+ ///
597+ /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
598+ /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
579599 ///
580- /// 'Uppercase' is defined according to the terms of the Unicode Derived Core
581- /// Property `Uppercase`.
600+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
601+ /// [ucd]: https://www.unicode.org/reports/tr44/
602+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
582603 ///
583604 /// # Examples
584605 ///
@@ -603,10 +624,12 @@ impl char {
603624 }
604625 }
605626
606- /// Returns `true` if this `char` is whitespace .
627+ /// Returns `true` if this `char` has the `White_Space` property .
607628 ///
608- /// 'Whitespace' is defined according to the terms of the Unicode Derived Core
609- /// Property `White_Space`.
629+ /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
630+ ///
631+ /// [ucd]: https://www.unicode.org/reports/tr44/
632+ /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
610633 ///
611634 /// # Examples
612635 ///
@@ -630,10 +653,10 @@ impl char {
630653 }
631654 }
632655
633- /// Returns `true` if this `char` is alphanumeric .
656+ /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`] .
634657 ///
635- /// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories
636- /// `Nd`, `Nl`, `No` and the Derived Core Property `Alphabetic`.
658+ /// [`is_alphabetic()`]: #method.is_alphabetic
659+ /// [`is_numeric()`]: #method.is_numeric
637660 ///
638661 /// # Examples
639662 ///
@@ -655,10 +678,15 @@ impl char {
655678 self . is_alphabetic ( ) || self . is_numeric ( )
656679 }
657680
658- /// Returns `true` if this `char` is a control code point.
681+ /// Returns `true` if this `char` has the general category for control codes.
682+ ///
683+ /// Control codes (code points with the general category of `Cc`) are described in Chapter 4
684+ /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
685+ /// Database][ucd] [`UnicodeData.txt`].
659686 ///
660- /// 'Control code point' is defined in terms of the Unicode General
661- /// Category `Cc`.
687+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
688+ /// [ucd]: https://www.unicode.org/reports/tr44/
689+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
662690 ///
663691 /// # Examples
664692 ///
@@ -675,19 +703,29 @@ impl char {
675703 general_category:: Cc ( self )
676704 }
677705
678- /// Returns `true` if this `char` is an extended grapheme character .
706+ /// Returns `true` if this `char` has the `Grapheme_Extend` property .
679707 ///
680- /// 'Extended grapheme character' is defined in terms of the Unicode Shaping and Rendering
681- /// Category `Grapheme_Extend`.
708+ /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
709+ /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
710+ /// [`DerivedCoreProperties.txt`].
711+ ///
712+ /// [uax29]: https://www.unicode.org/reports/tr29/
713+ /// [ucd]: https://www.unicode.org/reports/tr44/
714+ /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
682715 #[ inline]
683716 pub ( crate ) fn is_grapheme_extended ( self ) -> bool {
684717 derived_property:: Grapheme_Extend ( self )
685718 }
686719
687- /// Returns `true` if this `char` is numeric.
720+ /// Returns `true` if this `char` has one of the general categories for numbers.
721+ ///
722+ /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
723+ /// characters, and `No` for other numeric characters) are specified in the [Unicode Character
724+ /// Database][ucd] [`UnicodeData.txt`].
688725 ///
689- /// 'Numeric'-ness is defined in terms of the Unicode General Categories
690- /// `Nd`, `Nl`, `No`.
726+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
727+ /// [ucd]: https://www.unicode.org/reports/tr44/
728+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
691729 ///
692730 /// # Examples
693731 ///
@@ -713,25 +751,29 @@ impl char {
713751 }
714752 }
715753
716- /// Returns an iterator that yields the lowercase equivalent of a `char`
717- /// as one or more `char`s.
754+ /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
755+ /// `char`s.
718756 ///
719- /// If a character does not have a lowercase equivalent, the same character
720- /// will be returned back by the iterator.
757+ /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
721758 ///
722- /// This performs complex unconditional mappings with no tailoring: it maps
723- /// one Unicode character to its lowercase equivalent according to the
724- /// [Unicode database] and the additional complex mappings
725- /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
726- /// language) are not considered here.
759+ /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
760+ /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
727761 ///
728- /// For a full reference, see [here][reference].
762+ /// [ucd]: https://www.unicode.org/reports/tr44/
763+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
729764 ///
730- /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
765+ /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
766+ /// the `char`(s) given by [`SpecialCasing.txt`].
731767 ///
732- /// [`SpecialCasing.txt`]: ftp ://ftp .unicode.org/Public/UNIDATA /SpecialCasing.txt
768+ /// [`SpecialCasing.txt`]: https ://www .unicode.org/Public/UCD/latest/ucd /SpecialCasing.txt
733769 ///
734- /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
770+ /// This operation performs an unconditional mapping without tailoring. That is, the conversion
771+ /// is independent of context and language.
772+ ///
773+ /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
774+ /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
775+ ///
776+ /// [Unicode Standard]: https://www.unicode.org/versions/latest/
735777 ///
736778 /// # Examples
737779 ///
@@ -774,25 +816,29 @@ impl char {
774816 ToLowercase ( CaseMappingIter :: new ( conversions:: to_lower ( self ) ) )
775817 }
776818
777- /// Returns an iterator that yields the uppercase equivalent of a `char`
778- /// as one or more `char`s.
819+ /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
820+ /// `char`s.
821+ ///
822+ /// If this `char` does not have a uppercase mapping, the iterator yields the same `char`.
823+ ///
824+ /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
825+ /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
779826 ///
780- /// If a character does not have an uppercase equivalent, the same character
781- /// will be returned back by the iterator.
827+ /// [ucd]: https://www.unicode.org/reports/tr44/
828+ /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
782829 ///
783- /// This performs complex unconditional mappings with no tailoring: it maps
784- /// one Unicode character to its uppercase equivalent according to the
785- /// [Unicode database] and the additional complex mappings
786- /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
787- /// language) are not considered here.
830+ /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
831+ /// the `char`(s) given by [`SpecialCasing.txt`].
788832 ///
789- /// For a full reference, see [here][reference].
833+ /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
790834 ///
791- /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
835+ /// This operation performs an unconditional mapping without tailoring. That is, the conversion
836+ /// is independent of context and language.
792837 ///
793- /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
838+ /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
839+ /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
794840 ///
795- /// [reference ]: http ://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
841+ /// [Unicode Standard ]: https ://www.unicode.org/versions/latest/
796842 ///
797843 /// # Examples
798844 ///
0 commit comments