Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/comp/syntax/codemap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ fn span_to_lines(sp: span, cm: codemap::codemap) -> @file_lines {

fn get_line(fm: filemap, line: int) -> str unsafe {
let begin: uint = fm.lines[line].byte - fm.start_pos.byte;
let end = alt str::byte_index(*fm.src, '\n' as u8, begin) {
let end = alt str::byte_index_from(*fm.src, '\n' as u8, begin,
str::len(*fm.src)) {
some(e) { e }
none { str::len(*fm.src) }
};
Expand Down
140 changes: 94 additions & 46 deletions src/libcore/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ for correctness, but some UTF-8 unsafe functions are also provided.
For some heavy-duty uses, we recommend trying std::rope.
*/

import option::{some, none};

export
// Creating a string
from_bytes,
Expand Down Expand Up @@ -69,9 +71,11 @@ export
// Searching
index,
byte_index,
byte_index_from,
rindex,
find,
find_bytes,
find_from_bytes,
contains,
starts_with,
ends_with,
Expand Down Expand Up @@ -665,8 +669,8 @@ fn replace(s: str, from: str, to: str) : is_not_empty(from) -> str unsafe {
} else {
let idx;
alt find_bytes(s, from) {
option::some(x) { idx = x; }
option::none { ret s; }
some(x) { idx = x; }
none { ret s; }
}
let before = unsafe::slice_bytes(s, 0u, idx as uint);
let after = unsafe::slice_bytes(s, idx as uint + len_bytes(from),
Expand Down Expand Up @@ -842,28 +846,34 @@ fn index(ss: str, cc: char) -> option<uint> {

// found here?
if ch == cc {
ret option::some(cii);
ret some(cii);
}

cii += 1u;
bii = next;
}

// wasn't found
ret option::none;
ret none;
}

// Function: byte_index
//
// Returns the index of the first matching byte
// (as option some/none)
fn byte_index(s: str, b: u8, start: uint) -> option<uint> {
let i = start, l = len_bytes(s);
while i < l {
if s[i] == b { ret some(i); }
i += 1u;
}
ret none;
fn byte_index(s: str, b: u8) -> option<uint> {
byte_index_from(s, b, 0u, len_bytes(s))
}

// Function: byte_index_from
//
// Returns the index of the first matching byte within the range [`start`,
// `end`).
// (as option some/none)
fn byte_index_from(s: str, b: u8, start: uint, end: uint) -> option<uint> {
assert end <= len_bytes(s);

str::as_bytes(s) { |v| vec::position_from(v, start, end) { |x| x == b } }
}

// Function: rindex
Expand All @@ -880,40 +890,50 @@ fn rindex(ss: str, cc: char) -> option<uint> {

// found here?
if ch == cc {
ret option::some(cii);
ret some(cii);
}
}

// wasn't found
ret option::none;
ret none;
}

//Function: find_bytes
//
// Find the char position of the first instance of one string
// within another, or return option::none
fn find_bytes(haystack: str, needle: str) -> option<uint> {
find_from_bytes(haystack, needle, 0u, len_bytes(haystack))
}

//Function: find_from_bytes
//
// Find the char position of the first instance of one string
// within another, or return option::none
//
// FIXME: Boyer-Moore should be significantly faster
fn find_bytes(haystack: str, needle: str) -> option<uint> {
let haystack_len = len_bytes(haystack);
let needle_len = len_bytes(needle);
fn find_from_bytes(haystack: str, needle: str, start: uint, end:uint)
-> option<uint> {
assert end <= len_bytes(haystack);

let needle_len = len_bytes(needle);

if needle_len == 0u { ret option::some(0u); }
if needle_len > haystack_len { ret option::none; }
if needle_len == 0u { ret some(start); }
if needle_len > end { ret none; }

fn match_at(haystack: str, needle: str, ii: uint) -> bool {
let jj = ii;
for c: u8 in needle { if haystack[jj] != c { ret false; } jj += 1u; }
ret true;
}

let ii = 0u;
while ii <= haystack_len - needle_len {
if match_at(haystack, needle, ii) { ret option::some(ii); }
let ii = start;
while ii <= end - needle_len {
if match_at(haystack, needle, ii) { ret some(ii); }
ii += 1u;
}

ret option::none;
ret none;
}

// Function: find
Expand All @@ -922,8 +942,8 @@ fn find_bytes(haystack: str, needle: str) -> option<uint> {
// within another, or return option::none
fn find(haystack: str, needle: str) -> option<uint> {
alt find_bytes(haystack, needle) {
option::none { ret option::none; }
option::some(nn) { ret option::some(b2c_pos(haystack, nn)); }
none { ret none; }
some(nn) { ret some(b2c_pos(haystack, nn)); }
}
}

Expand Down Expand Up @@ -1522,18 +1542,18 @@ mod tests {

#[test]
fn test_index() {
assert ( index("hello", 'h') == option::some(0u));
assert ( index("hello", 'e') == option::some(1u));
assert ( index("hello", 'o') == option::some(4u));
assert ( index("hello", 'z') == option::none);
assert ( index("hello", 'h') == some(0u));
assert ( index("hello", 'e') == some(1u));
assert ( index("hello", 'o') == some(4u));
assert ( index("hello", 'z') == none);
}

#[test]
fn test_rindex() {
assert (rindex("hello", 'l') == option::some(3u));
assert (rindex("hello", 'o') == option::some(4u));
assert (rindex("hello", 'h') == option::some(0u));
assert (rindex("hello", 'z') == option::none);
assert (rindex("hello", 'l') == some(3u));
assert (rindex("hello", 'o') == some(4u));
assert (rindex("hello", 'h') == some(0u));
assert (rindex("hello", 'z') == none);
}

#[test]
Expand Down Expand Up @@ -1737,29 +1757,57 @@ mod tests {
#[test]
fn test_find_bytes() {
// byte positions
assert (find_bytes("banana", "apple pie") == option::none);
assert (find_bytes("", "") == option::some(0u));
assert (find_bytes("banana", "apple pie") == none);
assert (find_bytes("", "") == some(0u));

let data = "ประเทศไทย中华Việt Nam";
assert (find_bytes(data, "") == option::some(0u));
assert (find_bytes(data, "ประเ") == option::some( 0u));
assert (find_bytes(data, "ะเ") == option::some( 6u));
assert (find_bytes(data, "中华") == option::some(27u));
assert (find_bytes(data, "ไท华") == option::none);
assert (find_bytes(data, "") == some(0u));
assert (find_bytes(data, "ประเ") == some( 0u));
assert (find_bytes(data, "ะเ") == some( 6u));
assert (find_bytes(data, "中华") == some(27u));
assert (find_bytes(data, "ไท华") == none);
}

#[test]
fn test_find_from_bytes() {
// byte positions
assert (find_from_bytes("", "", 0u, 0u) == some(0u));

let data = "abcabc";
assert find_from_bytes(data, "ab", 0u, 6u) == some(0u);
assert find_from_bytes(data, "ab", 2u, 6u) == some(3u);
assert find_from_bytes(data, "ab", 2u, 4u) == none;

let data = "ประเทศไทย中华Việt Nam";
data += data;
assert find_from_bytes(data, "", 0u, 43u) == some(0u);
assert find_from_bytes(data, "", 6u, 43u) == some(6u);

assert find_from_bytes(data, "ประ", 0u, 43u) == some( 0u);
assert find_from_bytes(data, "ทศไ", 0u, 43u) == some(12u);
assert find_from_bytes(data, "ย中", 0u, 43u) == some(24u);
assert find_from_bytes(data, "iệt", 0u, 43u) == some(34u);
assert find_from_bytes(data, "Nam", 0u, 43u) == some(40u);

assert find_from_bytes(data, "ประ", 43u, 86u) == some(43u);
assert find_from_bytes(data, "ทศไ", 43u, 86u) == some(55u);
assert find_from_bytes(data, "ย中", 43u, 86u) == some(67u);
assert find_from_bytes(data, "iệt", 43u, 86u) == some(77u);
assert find_from_bytes(data, "Nam", 43u, 86u) == some(83u);
}

#[test]
fn test_find() {
// char positions
assert (find("banana", "apple pie") == option::none);
assert (find("", "") == option::some(0u));
assert (find("banana", "apple pie") == none);
assert (find("", "") == some(0u));

let data = "ประเทศไทย中华Việt Nam";
assert (find(data, "") == option::some(0u));
assert (find(data, "ประเ") == option::some(0u));
assert (find(data, "ะเ") == option::some(2u));
assert (find(data, "中华") == option::some(9u));
assert (find(data, "ไท华") == option::none);
assert (find(data, "") == some(0u));
assert (find(data, "ประเ") == some(0u));
assert (find(data, "ะเ") == some(2u));
assert (find(data, "中华") == some(9u));
assert (find(data, "ไท华") == none);
}

#[test]
Expand Down