@@ -714,6 +714,156 @@ impl String {
714714 . collect ( )
715715 }
716716
717+ /// Decode a UTF-16LE–encoded vector `v` into a `String`, returning [`Err`]
718+ /// if `v` contains any invalid data.
719+ ///
720+ /// # Examples
721+ ///
722+ /// Basic usage:
723+ ///
724+ /// ```
725+ /// #![feature(str_from_utf16_endian)]
726+ /// // 𝄞music
727+ /// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
728+ /// 0x73, 0x00, 0x69, 0x00, 0x63, 0x00];
729+ /// assert_eq!(String::from("𝄞music"),
730+ /// String::from_utf16le(v).unwrap());
731+ ///
732+ /// // 𝄞mu<invalid>ic
733+ /// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
734+ /// 0x00, 0xD8, 0x69, 0x00, 0x63, 0x00];
735+ /// assert!(String::from_utf16le(v).is_err());
736+ /// ```
737+ #[ cfg( not( no_global_oom_handling) ) ]
738+ #[ unstable( feature = "str_from_utf16_endian" , issue = "116258" ) ]
739+ pub fn from_utf16le ( v : & [ u8 ] ) -> Result < String , FromUtf16Error > {
740+ if v. len ( ) % 2 != 0 {
741+ return Err ( FromUtf16Error ( ( ) ) ) ;
742+ }
743+ match ( cfg ! ( target_endian = "little" ) , unsafe { v. align_to :: < u16 > ( ) } ) {
744+ ( true , ( [ ] , v, [ ] ) ) => Self :: from_utf16 ( v) ,
745+ _ => char:: decode_utf16 ( v. array_chunks :: < 2 > ( ) . copied ( ) . map ( u16:: from_le_bytes) )
746+ . collect :: < Result < _ , _ > > ( )
747+ . map_err ( |_| FromUtf16Error ( ( ) ) ) ,
748+ }
749+ }
750+
751+ /// Decode a UTF-16LE–encoded slice `v` into a `String`, replacing
752+ /// invalid data with [the replacement character (`U+FFFD`)][U+FFFD].
753+ ///
754+ /// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`],
755+ /// `from_utf16le_lossy` returns a `String` since the UTF-16 to UTF-8
756+ /// conversion requires a memory allocation.
757+ ///
758+ /// [`from_utf8_lossy`]: String::from_utf8_lossy
759+ /// [`Cow<'a, str>`]: crate::borrow::Cow "borrow::Cow"
760+ /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER
761+ ///
762+ /// # Examples
763+ ///
764+ /// Basic usage:
765+ ///
766+ /// ```
767+ /// #![feature(str_from_utf16_endian)]
768+ /// // 𝄞mus<invalid>ic<invalid>
769+ /// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
770+ /// 0x73, 0x00, 0x1E, 0xDD, 0x69, 0x00, 0x63, 0x00,
771+ /// 0x34, 0xD8];
772+ ///
773+ /// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"),
774+ /// String::from_utf16le_lossy(v));
775+ /// ```
776+ #[ cfg( not( no_global_oom_handling) ) ]
777+ #[ unstable( feature = "str_from_utf16_endian" , issue = "116258" ) ]
778+ pub fn from_utf16le_lossy ( v : & [ u8 ] ) -> String {
779+ match ( cfg ! ( target_endian = "little" ) , unsafe { v. align_to :: < u16 > ( ) } ) {
780+ ( true , ( [ ] , v, [ ] ) ) => Self :: from_utf16_lossy ( v) ,
781+ ( true , ( [ ] , v, [ _remainder] ) ) => Self :: from_utf16_lossy ( v) + "\u{FFFD} " ,
782+ _ => {
783+ let mut iter = v. array_chunks :: < 2 > ( ) ;
784+ let string = char:: decode_utf16 ( iter. by_ref ( ) . copied ( ) . map ( u16:: from_le_bytes) )
785+ . map ( |r| r. unwrap_or ( char:: REPLACEMENT_CHARACTER ) )
786+ . collect ( ) ;
787+ if iter. remainder ( ) . is_empty ( ) { string } else { string + "\u{FFFD} " }
788+ }
789+ }
790+ }
791+
792+ /// Decode a UTF-16BE–encoded vector `v` into a `String`, returning [`Err`]
793+ /// if `v` contains any invalid data.
794+ ///
795+ /// # Examples
796+ ///
797+ /// Basic usage:
798+ ///
799+ /// ```
800+ /// #![feature(str_from_utf16_endian)]
801+ /// // 𝄞music
802+ /// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
803+ /// 0x00, 0x73, 0x00, 0x69, 0x00, 0x63];
804+ /// assert_eq!(String::from("𝄞music"),
805+ /// String::from_utf16be(v).unwrap());
806+ ///
807+ /// // 𝄞mu<invalid>ic
808+ /// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
809+ /// 0xD8, 0x00, 0x00, 0x69, 0x00, 0x63];
810+ /// assert!(String::from_utf16be(v).is_err());
811+ /// ```
812+ #[ cfg( not( no_global_oom_handling) ) ]
813+ #[ unstable( feature = "str_from_utf16_endian" , issue = "116258" ) ]
814+ pub fn from_utf16be ( v : & [ u8 ] ) -> Result < String , FromUtf16Error > {
815+ if v. len ( ) % 2 != 0 {
816+ return Err ( FromUtf16Error ( ( ) ) ) ;
817+ }
818+ match ( cfg ! ( target_endian = "big" ) , unsafe { v. align_to :: < u16 > ( ) } ) {
819+ ( true , ( [ ] , v, [ ] ) ) => Self :: from_utf16 ( v) ,
820+ _ => char:: decode_utf16 ( v. array_chunks :: < 2 > ( ) . copied ( ) . map ( u16:: from_be_bytes) )
821+ . collect :: < Result < _ , _ > > ( )
822+ . map_err ( |_| FromUtf16Error ( ( ) ) ) ,
823+ }
824+ }
825+
826+ /// Decode a UTF-16BE–encoded slice `v` into a `String`, replacing
827+ /// invalid data with [the replacement character (`U+FFFD`)][U+FFFD].
828+ ///
829+ /// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`],
830+ /// `from_utf16le_lossy` returns a `String` since the UTF-16 to UTF-8
831+ /// conversion requires a memory allocation.
832+ ///
833+ /// [`from_utf8_lossy`]: String::from_utf8_lossy
834+ /// [`Cow<'a, str>`]: crate::borrow::Cow "borrow::Cow"
835+ /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER
836+ ///
837+ /// # Examples
838+ ///
839+ /// Basic usage:
840+ ///
841+ /// ```
842+ /// #![feature(str_from_utf16_endian)]
843+ /// // 𝄞mus<invalid>ic<invalid>
844+ /// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
845+ /// 0x00, 0x73, 0xDD, 0x1E, 0x00, 0x69, 0x00, 0x63,
846+ /// 0xD8, 0x34];
847+ ///
848+ /// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"),
849+ /// String::from_utf16be_lossy(v));
850+ /// ```
851+ #[ cfg( not( no_global_oom_handling) ) ]
852+ #[ unstable( feature = "str_from_utf16_endian" , issue = "116258" ) ]
853+ pub fn from_utf16be_lossy ( v : & [ u8 ] ) -> String {
854+ match ( cfg ! ( target_endian = "big" ) , unsafe { v. align_to :: < u16 > ( ) } ) {
855+ ( true , ( [ ] , v, [ ] ) ) => Self :: from_utf16_lossy ( v) ,
856+ ( true , ( [ ] , v, [ _remainder] ) ) => Self :: from_utf16_lossy ( v) + "\u{FFFD} " ,
857+ _ => {
858+ let mut iter = v. array_chunks :: < 2 > ( ) ;
859+ let string = char:: decode_utf16 ( iter. by_ref ( ) . copied ( ) . map ( u16:: from_be_bytes) )
860+ . map ( |r| r. unwrap_or ( char:: REPLACEMENT_CHARACTER ) )
861+ . collect ( ) ;
862+ if iter. remainder ( ) . is_empty ( ) { string } else { string + "\u{FFFD} " }
863+ }
864+ }
865+ }
866+
717867 /// Decomposes a `String` into its raw components.
718868 ///
719869 /// Returns the raw pointer to the underlying data, the length of
0 commit comments