|  | 
|  | 1 | +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT | 
|  | 2 | +// file at the top-level directory of this distribution and at | 
|  | 3 | +// http://rust-lang.org/COPYRIGHT. | 
|  | 4 | +// | 
|  | 5 | +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | 
|  | 6 | +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | 
|  | 7 | +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | 
|  | 8 | +// option. This file may not be copied, modified, or distributed | 
|  | 9 | +// except according to those terms. | 
|  | 10 | +// | 
|  | 11 | +// Original implementation taken from rust-memchr | 
|  | 12 | +// Copyright 2015 Andrew Gallant, bluss and Nicolas Koch | 
|  | 13 | + | 
|  | 14 | +use cmp; | 
|  | 15 | +use mem; | 
|  | 16 | + | 
|  | 17 | +const LO_U64: u64 = 0x0101010101010101; | 
|  | 18 | +const HI_U64: u64 = 0x8080808080808080; | 
|  | 19 | + | 
|  | 20 | +// use truncation | 
|  | 21 | +const LO_USIZE: usize = LO_U64 as usize; | 
|  | 22 | +const HI_USIZE: usize = HI_U64 as usize; | 
|  | 23 | + | 
|  | 24 | +/// Return `true` if `x` contains any zero byte. | 
|  | 25 | +/// | 
|  | 26 | +/// From *Matters Computational*, J. Arndt | 
|  | 27 | +/// | 
|  | 28 | +/// "The idea is to subtract one from each of the bytes and then look for | 
|  | 29 | +/// bytes where the borrow propagated all the way to the most significant | 
|  | 30 | +/// bit." | 
|  | 31 | +#[inline] | 
|  | 32 | +fn contains_zero_byte(x: usize) -> bool { | 
|  | 33 | +    x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0 | 
|  | 34 | +} | 
|  | 35 | + | 
|  | 36 | +#[cfg(target_pointer_width = "16")] | 
|  | 37 | +#[inline] | 
|  | 38 | +fn repeat_byte(b: u8) -> usize { | 
|  | 39 | +    (b as usize) << 8 | b as usize | 
|  | 40 | +} | 
|  | 41 | + | 
|  | 42 | +#[cfg(target_pointer_width = "32")] | 
|  | 43 | +#[inline] | 
|  | 44 | +fn repeat_byte(b: u8) -> usize { | 
|  | 45 | +    let mut rep = (b as usize) << 8 | b as usize; | 
|  | 46 | +    rep = rep << 16 | rep; | 
|  | 47 | +    rep | 
|  | 48 | +} | 
|  | 49 | + | 
|  | 50 | +#[cfg(target_pointer_width = "64")] | 
|  | 51 | +#[inline] | 
|  | 52 | +fn repeat_byte(b: u8) -> usize { | 
|  | 53 | +    let mut rep = (b as usize) << 8 | b as usize; | 
|  | 54 | +    rep = rep << 16 | rep; | 
|  | 55 | +    rep = rep << 32 | rep; | 
|  | 56 | +    rep | 
|  | 57 | +} | 
|  | 58 | + | 
|  | 59 | +/// Return the first index matching the byte `a` in `text`. | 
|  | 60 | +pub fn memchr(x: u8, text: &[u8]) -> Option<usize> { | 
|  | 61 | +    // Scan for a single byte value by reading two `usize` words at a time. | 
|  | 62 | +    // | 
|  | 63 | +    // Split `text` in three parts | 
|  | 64 | +    // - unaligned initial part, before the first word aligned address in text | 
|  | 65 | +    // - body, scan by 2 words at a time | 
|  | 66 | +    // - the last remaining part, < 2 word size | 
|  | 67 | +    let len = text.len(); | 
|  | 68 | +    let ptr = text.as_ptr(); | 
|  | 69 | +    let usize_bytes = mem::size_of::<usize>(); | 
|  | 70 | + | 
|  | 71 | +    // search up to an aligned boundary | 
|  | 72 | +    let mut offset = ptr.align_offset(usize_bytes); | 
|  | 73 | +    if offset > 0 { | 
|  | 74 | +        offset = cmp::min(offset, len); | 
|  | 75 | +        if let Some(index) = text[..offset].iter().position(|elt| *elt == x) { | 
|  | 76 | +            return Some(index); | 
|  | 77 | +        } | 
|  | 78 | +    } | 
|  | 79 | + | 
|  | 80 | +    // search the body of the text | 
|  | 81 | +    let repeated_x = repeat_byte(x); | 
|  | 82 | + | 
|  | 83 | +    if len >= 2 * usize_bytes { | 
|  | 84 | +        while offset <= len - 2 * usize_bytes { | 
|  | 85 | +            unsafe { | 
|  | 86 | +                let u = *(ptr.offset(offset as isize) as *const usize); | 
|  | 87 | +                let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize); | 
|  | 88 | + | 
|  | 89 | +                // break if there is a matching byte | 
|  | 90 | +                let zu = contains_zero_byte(u ^ repeated_x); | 
|  | 91 | +                let zv = contains_zero_byte(v ^ repeated_x); | 
|  | 92 | +                if zu || zv { | 
|  | 93 | +                    break; | 
|  | 94 | +                } | 
|  | 95 | +            } | 
|  | 96 | +            offset += usize_bytes * 2; | 
|  | 97 | +        } | 
|  | 98 | +    } | 
|  | 99 | + | 
|  | 100 | +    // find the byte after the point the body loop stopped | 
|  | 101 | +    text[offset..].iter().position(|elt| *elt == x).map(|i| offset + i) | 
|  | 102 | +} | 
|  | 103 | + | 
|  | 104 | +/// Return the last index matching the byte `a` in `text`. | 
|  | 105 | +pub fn memrchr(x: u8, text: &[u8]) -> Option<usize> { | 
|  | 106 | +    // Scan for a single byte value by reading two `usize` words at a time. | 
|  | 107 | +    // | 
|  | 108 | +    // Split `text` in three parts | 
|  | 109 | +    // - unaligned tail, after the last word aligned address in text | 
|  | 110 | +    // - body, scan by 2 words at a time | 
|  | 111 | +    // - the first remaining bytes, < 2 word size | 
|  | 112 | +    let len = text.len(); | 
|  | 113 | +    let ptr = text.as_ptr(); | 
|  | 114 | +    let usize_bytes = mem::size_of::<usize>(); | 
|  | 115 | + | 
|  | 116 | +    // search to an aligned boundary | 
|  | 117 | +    let end_align = (ptr as usize + len) & (usize_bytes - 1); | 
|  | 118 | +    let mut offset; | 
|  | 119 | +    if end_align > 0 { | 
|  | 120 | +        offset = if end_align >= len { 0 } else { len - end_align }; | 
|  | 121 | +        if let Some(index) = text[offset..].iter().rposition(|elt| *elt == x) { | 
|  | 122 | +            return Some(offset + index); | 
|  | 123 | +        } | 
|  | 124 | +    } else { | 
|  | 125 | +        offset = len; | 
|  | 126 | +    } | 
|  | 127 | + | 
|  | 128 | +    // search the body of the text | 
|  | 129 | +    let repeated_x = repeat_byte(x); | 
|  | 130 | + | 
|  | 131 | +    while offset >= 2 * usize_bytes { | 
|  | 132 | +        unsafe { | 
|  | 133 | +            let u = *(ptr.offset(offset as isize - 2 * usize_bytes as isize) as *const usize); | 
|  | 134 | +            let v = *(ptr.offset(offset as isize - usize_bytes as isize) as *const usize); | 
|  | 135 | + | 
|  | 136 | +            // break if there is a matching byte | 
|  | 137 | +            let zu = contains_zero_byte(u ^ repeated_x); | 
|  | 138 | +            let zv = contains_zero_byte(v ^ repeated_x); | 
|  | 139 | +            if zu || zv { | 
|  | 140 | +                break; | 
|  | 141 | +            } | 
|  | 142 | +        } | 
|  | 143 | +        offset -= 2 * usize_bytes; | 
|  | 144 | +    } | 
|  | 145 | + | 
|  | 146 | +    // find the byte before the point the body loop stopped | 
|  | 147 | +    text[..offset].iter().rposition(|elt| *elt == x) | 
|  | 148 | +} | 
|  | 149 | + | 
|  | 150 | +// test fallback implementations on all platforms | 
|  | 151 | +#[test] | 
|  | 152 | +fn matches_one() { | 
|  | 153 | +    assert_eq!(Some(0), memchr(b'a', b"a")); | 
|  | 154 | +} | 
|  | 155 | + | 
|  | 156 | +#[test] | 
|  | 157 | +fn matches_begin() { | 
|  | 158 | +    assert_eq!(Some(0), memchr(b'a', b"aaaa")); | 
|  | 159 | +} | 
|  | 160 | + | 
|  | 161 | +#[test] | 
|  | 162 | +fn matches_end() { | 
|  | 163 | +    assert_eq!(Some(4), memchr(b'z', b"aaaaz")); | 
|  | 164 | +} | 
|  | 165 | + | 
|  | 166 | +#[test] | 
|  | 167 | +fn matches_nul() { | 
|  | 168 | +    assert_eq!(Some(4), memchr(b'\x00', b"aaaa\x00")); | 
|  | 169 | +} | 
|  | 170 | + | 
|  | 171 | +#[test] | 
|  | 172 | +fn matches_past_nul() { | 
|  | 173 | +    assert_eq!(Some(5), memchr(b'z', b"aaaa\x00z")); | 
|  | 174 | +} | 
|  | 175 | + | 
|  | 176 | +#[test] | 
|  | 177 | +fn no_match_empty() { | 
|  | 178 | +    assert_eq!(None, memchr(b'a', b"")); | 
|  | 179 | +} | 
|  | 180 | + | 
|  | 181 | +#[test] | 
|  | 182 | +fn no_match() { | 
|  | 183 | +    assert_eq!(None, memchr(b'a', b"xyz")); | 
|  | 184 | +} | 
|  | 185 | + | 
|  | 186 | +#[test] | 
|  | 187 | +fn matches_one_reversed() { | 
|  | 188 | +    assert_eq!(Some(0), memrchr(b'a', b"a")); | 
|  | 189 | +} | 
|  | 190 | + | 
|  | 191 | +#[test] | 
|  | 192 | +fn matches_begin_reversed() { | 
|  | 193 | +    assert_eq!(Some(3), memrchr(b'a', b"aaaa")); | 
|  | 194 | +} | 
|  | 195 | + | 
|  | 196 | +#[test] | 
|  | 197 | +fn matches_end_reversed() { | 
|  | 198 | +    assert_eq!(Some(0), memrchr(b'z', b"zaaaa")); | 
|  | 199 | +} | 
|  | 200 | + | 
|  | 201 | +#[test] | 
|  | 202 | +fn matches_nul_reversed() { | 
|  | 203 | +    assert_eq!(Some(4), memrchr(b'\x00', b"aaaa\x00")); | 
|  | 204 | +} | 
|  | 205 | + | 
|  | 206 | +#[test] | 
|  | 207 | +fn matches_past_nul_reversed() { | 
|  | 208 | +    assert_eq!(Some(0), memrchr(b'z', b"z\x00aaaa")); | 
|  | 209 | +} | 
|  | 210 | + | 
|  | 211 | +#[test] | 
|  | 212 | +fn no_match_empty_reversed() { | 
|  | 213 | +    assert_eq!(None, memrchr(b'a', b"")); | 
|  | 214 | +} | 
|  | 215 | + | 
|  | 216 | +#[test] | 
|  | 217 | +fn no_match_reversed() { | 
|  | 218 | +    assert_eq!(None, memrchr(b'a', b"xyz")); | 
|  | 219 | +} | 
|  | 220 | + | 
|  | 221 | +#[test] | 
|  | 222 | +fn each_alignment_reversed() { | 
|  | 223 | +    let mut data = [1u8; 64]; | 
|  | 224 | +    let needle = 2; | 
|  | 225 | +    let pos = 40; | 
|  | 226 | +    data[pos] = needle; | 
|  | 227 | +    for start in 0..16 { | 
|  | 228 | +        assert_eq!(Some(pos - start), memrchr(needle, &data[start..])); | 
|  | 229 | +    } | 
|  | 230 | +} | 
0 commit comments