diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index 7e153839448cb..744ea97441d0f 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -157,7 +157,8 @@ fn span_to_lines(sp: span, cm: codemap::codemap) -> @file_lines { fn get_line(fm: filemap, line: int) -> str unsafe { let begin: uint = fm.lines[line].byte - fm.start_pos.byte; - let end = alt str::byte_index(*fm.src, '\n' as u8, begin) { + let end = alt str::byte_index_from(*fm.src, '\n' as u8, begin, + str::len(*fm.src)) { some(e) { e } none { str::len(*fm.src) } }; diff --git a/src/libcore/str.rs b/src/libcore/str.rs index f79594a3023e0..e1c99a48fa103 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -9,6 +9,8 @@ for correctness, but some UTF-8 unsafe functions are also provided. For some heavy-duty uses, we recommend trying std::rope. */ +import option::{some, none}; + export // Creating a string from_bytes, @@ -69,9 +71,11 @@ export // Searching index, byte_index, + byte_index_from, rindex, find, find_bytes, + find_from_bytes, contains, starts_with, ends_with, @@ -665,8 +669,8 @@ fn replace(s: str, from: str, to: str) : is_not_empty(from) -> str unsafe { } else { let idx; alt find_bytes(s, from) { - option::some(x) { idx = x; } - option::none { ret s; } + some(x) { idx = x; } + none { ret s; } } let before = unsafe::slice_bytes(s, 0u, idx as uint); let after = unsafe::slice_bytes(s, idx as uint + len_bytes(from), @@ -842,7 +846,7 @@ fn index(ss: str, cc: char) -> option { // found here? if ch == cc { - ret option::some(cii); + ret some(cii); } cii += 1u; @@ -850,20 +854,26 @@ fn index(ss: str, cc: char) -> option { } // wasn't found - ret option::none; + ret none; } // Function: byte_index // // Returns the index of the first matching byte // (as option some/none) -fn byte_index(s: str, b: u8, start: uint) -> option { - let i = start, l = len_bytes(s); - while i < l { - if s[i] == b { ret some(i); } - i += 1u; - } - ret none; +fn byte_index(s: str, b: u8) -> option { + byte_index_from(s, b, 0u, len_bytes(s)) +} + +// Function: byte_index_from +// +// Returns the index of the first matching byte within the range [`start`, +// `end`). +// (as option some/none) +fn byte_index_from(s: str, b: u8, start: uint, end: uint) -> option { + assert end <= len_bytes(s); + + str::as_bytes(s) { |v| vec::position_from(v, start, end) { |x| x == b } } } // Function: rindex @@ -880,26 +890,36 @@ fn rindex(ss: str, cc: char) -> option { // found here? if ch == cc { - ret option::some(cii); + ret some(cii); } } // wasn't found - ret option::none; + ret none; } //Function: find_bytes // // Find the char position of the first instance of one string // within another, or return option::none +fn find_bytes(haystack: str, needle: str) -> option { + find_from_bytes(haystack, needle, 0u, len_bytes(haystack)) +} + +//Function: find_from_bytes +// +// Find the char position of the first instance of one string +// within another, or return option::none // // FIXME: Boyer-Moore should be significantly faster -fn find_bytes(haystack: str, needle: str) -> option { - let haystack_len = len_bytes(haystack); - let needle_len = len_bytes(needle); +fn find_from_bytes(haystack: str, needle: str, start: uint, end:uint) + -> option { + assert end <= len_bytes(haystack); + + let needle_len = len_bytes(needle); - if needle_len == 0u { ret option::some(0u); } - if needle_len > haystack_len { ret option::none; } + if needle_len == 0u { ret some(start); } + if needle_len > end { ret none; } fn match_at(haystack: str, needle: str, ii: uint) -> bool { let jj = ii; @@ -907,13 +927,13 @@ fn find_bytes(haystack: str, needle: str) -> option { ret true; } - let ii = 0u; - while ii <= haystack_len - needle_len { - if match_at(haystack, needle, ii) { ret option::some(ii); } + let ii = start; + while ii <= end - needle_len { + if match_at(haystack, needle, ii) { ret some(ii); } ii += 1u; } - ret option::none; + ret none; } // Function: find @@ -922,8 +942,8 @@ fn find_bytes(haystack: str, needle: str) -> option { // within another, or return option::none fn find(haystack: str, needle: str) -> option { alt find_bytes(haystack, needle) { - option::none { ret option::none; } - option::some(nn) { ret option::some(b2c_pos(haystack, nn)); } + none { ret none; } + some(nn) { ret some(b2c_pos(haystack, nn)); } } } @@ -1522,18 +1542,18 @@ mod tests { #[test] fn test_index() { - assert ( index("hello", 'h') == option::some(0u)); - assert ( index("hello", 'e') == option::some(1u)); - assert ( index("hello", 'o') == option::some(4u)); - assert ( index("hello", 'z') == option::none); + assert ( index("hello", 'h') == some(0u)); + assert ( index("hello", 'e') == some(1u)); + assert ( index("hello", 'o') == some(4u)); + assert ( index("hello", 'z') == none); } #[test] fn test_rindex() { - assert (rindex("hello", 'l') == option::some(3u)); - assert (rindex("hello", 'o') == option::some(4u)); - assert (rindex("hello", 'h') == option::some(0u)); - assert (rindex("hello", 'z') == option::none); + assert (rindex("hello", 'l') == some(3u)); + assert (rindex("hello", 'o') == some(4u)); + assert (rindex("hello", 'h') == some(0u)); + assert (rindex("hello", 'z') == none); } #[test] @@ -1737,29 +1757,57 @@ mod tests { #[test] fn test_find_bytes() { // byte positions - assert (find_bytes("banana", "apple pie") == option::none); - assert (find_bytes("", "") == option::some(0u)); + assert (find_bytes("banana", "apple pie") == none); + assert (find_bytes("", "") == some(0u)); let data = "ประเทศไทย中华Việt Nam"; - assert (find_bytes(data, "") == option::some(0u)); - assert (find_bytes(data, "ประเ") == option::some( 0u)); - assert (find_bytes(data, "ะเ") == option::some( 6u)); - assert (find_bytes(data, "中华") == option::some(27u)); - assert (find_bytes(data, "ไท华") == option::none); + assert (find_bytes(data, "") == some(0u)); + assert (find_bytes(data, "ประเ") == some( 0u)); + assert (find_bytes(data, "ะเ") == some( 6u)); + assert (find_bytes(data, "中华") == some(27u)); + assert (find_bytes(data, "ไท华") == none); + } + + #[test] + fn test_find_from_bytes() { + // byte positions + assert (find_from_bytes("", "", 0u, 0u) == some(0u)); + + let data = "abcabc"; + assert find_from_bytes(data, "ab", 0u, 6u) == some(0u); + assert find_from_bytes(data, "ab", 2u, 6u) == some(3u); + assert find_from_bytes(data, "ab", 2u, 4u) == none; + + let data = "ประเทศไทย中华Việt Nam"; + data += data; + assert find_from_bytes(data, "", 0u, 43u) == some(0u); + assert find_from_bytes(data, "", 6u, 43u) == some(6u); + + assert find_from_bytes(data, "ประ", 0u, 43u) == some( 0u); + assert find_from_bytes(data, "ทศไ", 0u, 43u) == some(12u); + assert find_from_bytes(data, "ย中", 0u, 43u) == some(24u); + assert find_from_bytes(data, "iệt", 0u, 43u) == some(34u); + assert find_from_bytes(data, "Nam", 0u, 43u) == some(40u); + + assert find_from_bytes(data, "ประ", 43u, 86u) == some(43u); + assert find_from_bytes(data, "ทศไ", 43u, 86u) == some(55u); + assert find_from_bytes(data, "ย中", 43u, 86u) == some(67u); + assert find_from_bytes(data, "iệt", 43u, 86u) == some(77u); + assert find_from_bytes(data, "Nam", 43u, 86u) == some(83u); } #[test] fn test_find() { // char positions - assert (find("banana", "apple pie") == option::none); - assert (find("", "") == option::some(0u)); + assert (find("banana", "apple pie") == none); + assert (find("", "") == some(0u)); let data = "ประเทศไทย中华Việt Nam"; - assert (find(data, "") == option::some(0u)); - assert (find(data, "ประเ") == option::some(0u)); - assert (find(data, "ะเ") == option::some(2u)); - assert (find(data, "中华") == option::some(9u)); - assert (find(data, "ไท华") == option::none); + assert (find(data, "") == some(0u)); + assert (find(data, "ประเ") == some(0u)); + assert (find(data, "ะเ") == some(2u)); + assert (find(data, "中华") == some(9u)); + assert (find(data, "ไท华") == none); } #[test]