diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs index 5b7d7a35c5371..23ddb56c8f068 100644 --- a/src/comp/back/link.rs +++ b/src/comp/back/link.rs @@ -395,7 +395,7 @@ fn build_link_meta(sess: session, c: ast::crate, output: str, metas: provided_metas, dep_hashes: [str]) -> str { fn len_and_str(s: str) -> str { - ret #fmt["%u_%s", str::len_bytes(s), s]; + ret #fmt["%u_%s", str::len(s), s]; } fn len_and_str_lit(l: ast::lit) -> str { @@ -478,7 +478,7 @@ fn build_link_meta(sess: session, c: ast::crate, output: str, } fn truncated_sha1_result(sha: sha1) -> str unsafe { - ret str::unsafe::slice_bytes(sha.result_str(), 0u, 16u); + ret str::slice(sha.result_str(), 0u, 16u); } @@ -520,7 +520,7 @@ fn mangle(ss: path) -> str { for s in ss { alt s { path_name(s) | path_mod(s) { - n += #fmt["%u%s", str::len_bytes(s), s]; + n += #fmt["%u%s", str::len(s), s]; } } } n += "E"; // End name-sequence. @@ -567,13 +567,12 @@ fn link_binary(sess: session, // Converts a library file name into a cc -l argument fn unlib(config: @session::config, filename: str) -> str unsafe { let rmlib = fn@(filename: str) -> str { - let found = str::find_bytes(filename, "lib"); + let found = str::find(filename, "lib"); if config.os == session::os_macos || (config.os == session::os_linux || config.os == session::os_freebsd) && option::is_some(found) && option::get(found) == 0u { - ret str::unsafe::slice_bytes(filename, 3u, - str::len_bytes(filename)); + ret str::slice(filename, 3u, str::len(filename)); } else { ret filename; } }; fn rmext(filename: str) -> str { diff --git a/src/comp/driver/rustc.rs b/src/comp/driver/rustc.rs index 5186290e4a4cb..8d0b854906230 100644 --- a/src/comp/driver/rustc.rs +++ b/src/comp/driver/rustc.rs @@ -13,7 +13,7 @@ import rustc::driver::diagnostic; fn version(argv0: str) { let vers = "unknown version"; let env_vers = #env["CFG_VERSION"]; - if str::len_bytes(env_vers) != 0u { vers = env_vers; } + if str::len(env_vers) != 0u { vers = env_vers; } io::stdout().write_str(#fmt["%s %s\n", argv0, vers]); io::stdout().write_str(#fmt["host: %s\n", host_triple()]); } diff --git a/src/comp/middle/trans/common.rs b/src/comp/middle/trans/common.rs index 24af4e33b9abf..1c04bc8c4c69a 100644 --- a/src/comp/middle/trans/common.rs +++ b/src/comp/middle/trans/common.rs @@ -777,7 +777,7 @@ fn C_u8(i: uint) -> ValueRef { ret C_integral(T_i8(), i as u64, False); } // our boxed-and-length-annotated strings. fn C_cstr(cx: crate_ctxt, s: str) -> ValueRef { let sc = str::as_buf(s) {|buf| - llvm::LLVMConstString(buf, str::len_bytes(s) as unsigned, False) + llvm::LLVMConstString(buf, str::len(s) as unsigned, False) }; let g = str::as_buf(cx.names("str"), @@ -791,7 +791,7 @@ fn C_cstr(cx: crate_ctxt, s: str) -> ValueRef { // Returns a Plain Old LLVM String: fn C_postr(s: str) -> ValueRef { ret str::as_buf(s) {|buf| - llvm::LLVMConstString(buf, str::len_bytes(s) as unsigned, False) + llvm::LLVMConstString(buf, str::len(s) as unsigned, False) }; } diff --git a/src/comp/middle/trans/debuginfo.rs b/src/comp/middle/trans/debuginfo.rs index 39da8cf2c57d8..47024432b4e12 100644 --- a/src/comp/middle/trans/debuginfo.rs +++ b/src/comp/middle/trans/debuginfo.rs @@ -48,7 +48,7 @@ const DW_ATE_unsigned_char: int = 0x08; fn llstr(s: str) -> ValueRef { str::as_buf(s, {|sbuf| - llvm::LLVMMDString(sbuf, str::len_bytes(s) as ctypes::c_uint) + llvm::LLVMMDString(sbuf, str::len(s) as ctypes::c_uint) }) } fn lltag(lltag: int) -> ValueRef { @@ -167,8 +167,7 @@ fn create_compile_unit(cx: crate_ctxt, full_path: str) let work_dir = cx.sess.working_dir; let file_path = if str::starts_with(full_path, work_dir) { - str::unsafe::slice_bytes(full_path, str::len_bytes(work_dir), - str::len_bytes(full_path)) + str::slice(full_path, str::len(work_dir), str::len(full_path)) } else { full_path }; diff --git a/src/comp/middle/trans/tvec.rs b/src/comp/middle/trans/tvec.rs index ab9412263d2cf..aeaf65db7f4c0 100644 --- a/src/comp/middle/trans/tvec.rs +++ b/src/comp/middle/trans/tvec.rs @@ -126,7 +126,7 @@ fn trans_vec(bcx: block, args: [@ast::expr], id: ast::node_id, } fn trans_str(bcx: block, s: str, dest: dest) -> block { - let veclen = str::len_bytes(s) + 1u; // +1 for \0 + let veclen = str::len(s) + 1u; // +1 for \0 let {bcx: bcx, val: sptr, _} = alloc(bcx, ty::mk_str(bcx.tcx()), veclen); diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index 744ea97441d0f..0d1172b3502cd 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -157,12 +157,11 @@ fn span_to_lines(sp: span, cm: codemap::codemap) -> @file_lines { fn get_line(fm: filemap, line: int) -> str unsafe { let begin: uint = fm.lines[line].byte - fm.start_pos.byte; - let end = alt str::byte_index_from(*fm.src, '\n' as u8, begin, - str::len(*fm.src)) { + let end = alt str::index_from(*fm.src, '\n', begin, str::len(*fm.src)) { some(e) { e } none { str::len(*fm.src) } }; - str::unsafe::slice_bytes(*fm.src, begin, end) + str::slice(*fm.src, begin, end) } fn lookup_byte_offset(cm: codemap::codemap, chpos: uint) @@ -172,7 +171,7 @@ fn lookup_byte_offset(cm: codemap::codemap, chpos: uint) let {fm,line} = lookup_line(cm,chpos,lookup); let line_offset = fm.lines[line].byte - fm.start_pos.byte; let col = chpos - fm.lines[line].ch; - let col_offset = str::substr_len_bytes(*fm.src, line_offset, col); + let col_offset = str::substr_len(*fm.src, line_offset, col); ret {fm: fm, pos: line_offset + col_offset}; } @@ -180,13 +179,13 @@ fn span_to_snippet(sp: span, cm: codemap::codemap) -> str { let begin = lookup_byte_offset(cm,sp.lo); let end = lookup_byte_offset(cm,sp.hi); assert begin.fm == end.fm; - ret str::slice(*begin.fm.src, begin.pos, end.pos); + ret str::slice_chars(*begin.fm.src, begin.pos, end.pos); } fn get_snippet(cm: codemap::codemap, fidx: uint, lo: uint, hi: uint) -> str { let fm = cm.files[fidx]; - ret str::slice(*fm.src, lo, hi) + ret str::slice_chars(*fm.src, lo, hi) } fn get_filemap(cm: codemap, filename: str) -> filemap { diff --git a/src/comp/syntax/ext/qquote.rs b/src/comp/syntax/ext/qquote.rs index 3476584aecece..e53307b474abf 100644 --- a/src/comp/syntax/ext/qquote.rs +++ b/src/comp/syntax/ext/qquote.rs @@ -214,7 +214,7 @@ fn finish if (j < g_len && i == cx.gather[j].lo) { assert ch == '$'; let repl = #fmt("$%u ", j); - state = skip(str::len(repl)); + state = skip(str::len_chars(repl)); str2 += repl; } alt state { diff --git a/src/comp/syntax/parse/lexer.rs b/src/comp/syntax/parse/lexer.rs index 113ca056ed40b..86d925e87e65f 100644 --- a/src/comp/syntax/parse/lexer.rs +++ b/src/comp/syntax/parse/lexer.rs @@ -24,7 +24,7 @@ impl reader for reader { fn get_str_from(start: uint) -> str unsafe { // I'm pretty skeptical about this subtraction. What if there's a // multi-byte character before the mark? - ret str::unsafe::slice_bytes(*self.src, start - 1u, self.pos - 1u); + ret str::slice(*self.src, start - 1u, self.pos - 1u); } fn next() -> char { if self.pos < self.len { @@ -64,7 +64,7 @@ fn new_reader(cm: codemap::codemap, itr: @interner::interner) -> reader { let r = @{cm: cm, span_diagnostic: span_diagnostic, - src: filemap.src, len: str::len_bytes(*filemap.src), + src: filemap.src, len: str::len(*filemap.src), mutable col: 0u, mutable pos: 0u, mutable curr: -1 as char, mutable chpos: filemap.start_pos.ch, mutable strs: [], filemap: filemap, interner: itr}; @@ -163,7 +163,7 @@ fn scan_exponent(rdr: reader) -> option { rdr.bump(); } let exponent = scan_digits(rdr, 10u); - if str::len_bytes(exponent) > 0u { + if str::len(exponent) > 0u { ret some(rslt + exponent); } else { rdr.fatal("scan_exponent: bad fp literal"); } } else { ret none::; } @@ -226,7 +226,7 @@ fn scan_number(c: char, rdr: reader) -> token::token { tp = if signed { either::left(ast::ty_i64) } else { either::right(ast::ty_u64) }; } - if str::len_bytes(num_str) == 0u { + if str::len(num_str) == 0u { rdr.fatal("no valid digits found for number"); } let parsed = option::get(u64::from_str(num_str, base as u64)); @@ -273,7 +273,7 @@ fn scan_number(c: char, rdr: reader) -> token::token { ret token::LIT_FLOAT(interner::intern(*rdr.interner, num_str), ast::ty_f); } else { - if str::len_bytes(num_str) == 0u { + if str::len(num_str) == 0u { rdr.fatal("no valid digits found for number"); } let parsed = option::get(u64::from_str(num_str, base as u64)); @@ -610,8 +610,8 @@ fn trim_whitespace_prefix_and_push_line(&lines: [str], s: str, col: uint) unsafe { let s1; if all_whitespace(s, 0u, col) { - if col < str::len_bytes(s) { - s1 = str::unsafe::slice_bytes(s, col, str::len_bytes(s)); + if col < str::len(s) { + s1 = str::slice(s, col, str::len(s)); } else { s1 = ""; } } else { s1 = s; } log(debug, "pushing line: " + s1); @@ -651,7 +651,7 @@ fn read_block_comment(rdr: reader, code_to_the_left: bool) -> cmnt { } } } - if str::len_bytes(curr_line) != 0u { + if str::len(curr_line) != 0u { trim_whitespace_prefix_and_push_line(lines, curr_line, col); } let style = if code_to_the_left { trailing } else { isolated }; diff --git a/src/comp/syntax/print/pprust.rs b/src/comp/syntax/print/pprust.rs index 2b29bf57eaa10..76ab2b4ecb542 100644 --- a/src/comp/syntax/print/pprust.rs +++ b/src/comp/syntax/print/pprust.rs @@ -1465,7 +1465,7 @@ fn print_ty_fn(s: ps, opt_proto: option, popen(s); fn print_arg(s: ps, input: ast::arg) { print_arg_mode(s, input.mode); - if str::len_bytes(input.ident) > 0u { + if str::len(input.ident) > 0u { word_space(s, input.ident + ":"); } print_type(s, input.ty); @@ -1647,7 +1647,7 @@ fn print_string(s: ps, st: str) { fn escape_str(st: str, to_escape: char) -> str { let out: str = ""; - let len = str::len_bytes(st); + let len = str::len(st); let i = 0u; while i < len { alt st[i] as char { diff --git a/src/comp/util/ppaux.rs b/src/comp/util/ppaux.rs index 547c2d5ee824c..0ed630629b719 100644 --- a/src/comp/util/ppaux.rs +++ b/src/comp/util/ppaux.rs @@ -129,9 +129,9 @@ fn ty_to_str(cx: ctxt, typ: t) -> str { } } -fn ty_to_short_str(cx: ctxt, typ: t) -> str unsafe { +fn ty_to_short_str(cx: ctxt, typ: t) -> str { let s = encoder::encoded_ty(cx, typ); - if str::len_bytes(s) >= 32u { s = str::unsafe::slice_bytes(s, 0u, 32u); } + if str::len(s) >= 32u { s = str::slice(s, 0u, 32u); } ret s; } diff --git a/src/compiletest/errors.rs b/src/compiletest/errors.rs index f6b32e2fd311b..90b0c7d34e5f4 100644 --- a/src/compiletest/errors.rs +++ b/src/compiletest/errors.rs @@ -25,15 +25,15 @@ fn load_errors(testfile: str) -> [expected_error] { fn parse_expected(line_num: uint, line: str) -> [expected_error] unsafe { let error_tag = "//!"; let idx; - alt str::find_bytes(line, error_tag) { + alt str::find(line, error_tag) { option::none { ret []; } - option::some(nn) { idx = (nn as uint) + str::len_bytes(error_tag); } + option::some(nn) { idx = (nn as uint) + str::len(error_tag); } } // "//!^^^ kind msg" denotes a message expected // three lines above current line: let adjust_line = 0u; - let len = str::len_bytes(line); + let len = str::len(line); while idx < len && line[idx] == ('^' as u8) { adjust_line += 1u; idx += 1u; @@ -43,11 +43,11 @@ fn parse_expected(line_num: uint, line: str) -> [expected_error] unsafe { while idx < len && line[idx] == (' ' as u8) { idx += 1u; } let start_kind = idx; while idx < len && line[idx] != (' ' as u8) { idx += 1u; } - let kind = str::to_lower(str::unsafe::slice_bytes(line, start_kind, idx)); + let kind = str::to_lower(str::slice(line, start_kind, idx)); // Extract msg: while idx < len && line[idx] == (' ' as u8) { idx += 1u; } - let msg = str::unsafe::slice_bytes(line, idx, len); + let msg = str::slice(line, idx, len); #debug("line=%u kind=%s msg=%s", line_num - adjust_line, kind, msg); diff --git a/src/compiletest/header.rs b/src/compiletest/header.rs index a976c5fb98c7c..099598d7fdb52 100644 --- a/src/compiletest/header.rs +++ b/src/compiletest/header.rs @@ -106,12 +106,10 @@ fn parse_name_directive(line: str, directive: str) -> bool { fn parse_name_value_directive(line: str, directive: str) -> option unsafe { let keycolon = directive + ":"; - alt str::find_bytes(line, keycolon) { + alt str::find(line, keycolon) { option::some(colon) { - let value = - str::unsafe::slice_bytes(line, - colon + str::len_bytes(keycolon), - str::len_bytes(line)); + let value = str::slice(line, colon + str::len(keycolon), + str::len(line)); #debug("%s: %s", directive, value); option::some(value) } diff --git a/src/fuzzer/fuzzer.rs b/src/fuzzer/fuzzer.rs index b0a47e4051b3d..4b614d1f08b6a 100644 --- a/src/fuzzer/fuzzer.rs +++ b/src/fuzzer/fuzzer.rs @@ -335,7 +335,7 @@ fn removeDirIfExists(filename: str) { fn check_running(exe_filename: str) -> happiness { let p = std::run::program_output("/Users/jruderman/scripts/timed_run_rust_program.py", [exe_filename]); let comb = p.out + "\n" + p.err; - if str::len_bytes(comb) > 1u { + if str::len(comb) > 1u { log(error, "comb comb comb: " + comb); } diff --git a/src/libcore/extfmt.rs b/src/libcore/extfmt.rs index 30676e34de831..b5e2eade244cb 100644 --- a/src/libcore/extfmt.rs +++ b/src/libcore/extfmt.rs @@ -82,10 +82,10 @@ mod ct { fn parse_fmt_string(s: str, error: error_fn) -> [piece] unsafe { let pieces: [piece] = []; - let lim = str::len_bytes(s); + let lim = str::len(s); let buf = ""; fn flush_buf(buf: str, &pieces: [piece]) -> str { - if str::len_bytes(buf) > 0u { + if str::len(buf) > 0u { let piece = piece_string(buf); pieces += [piece]; } @@ -93,13 +93,13 @@ mod ct { } let i = 0u; while i < lim { - let curr = str::unsafe::slice_bytes(s, i, i+1u); + let curr = str::slice(s, i, i+1u); if str::eq(curr, "%") { i += 1u; if i >= lim { error("unterminated conversion at end of string"); } - let curr2 = str::unsafe::slice_bytes(s, i, i+1u); + let curr2 = str::slice(s, i, i+1u); if str::eq(curr2, "%") { buf += curr2; i += 1u; @@ -225,7 +225,7 @@ mod ct { fn parse_type(s: str, i: uint, lim: uint, error: error_fn) -> {ty: ty, next: uint} unsafe { if i >= lim { error("missing type in conversion"); } - let tstr = str::unsafe::slice_bytes(s, i, i+1u); + let tstr = str::slice(s, i, i+1u); // TODO: Do we really want two signed types here? // How important is it to be printf compatible? let t = @@ -325,7 +325,7 @@ mod rt { alt cv.precision { count_implied { s } count_is(max) { - if max as uint < str::len(s) { + if max as uint < str::len_chars(s) { str::substr(s, 0u, max as uint) } else { s } } @@ -368,7 +368,7 @@ mod rt { "" } else { let s = uint::to_str(num, radix); - let len = str::len(s); + let len = str::len_chars(s); if len < prec { let diff = prec - len; let pad = str_init_elt(diff, '0'); @@ -400,7 +400,7 @@ mod rt { uwidth = width as uint; } } - let strlen = str::len(s); + let strlen = str::len_chars(s); if uwidth <= strlen { ret s; } let padchar = ' '; let diff = uwidth - strlen; @@ -433,13 +433,13 @@ mod rt { // zeros. It may make sense to convert zero padding to a precision // instead. - if signed && zero_padding && str::len_bytes(s) > 0u { + if signed && zero_padding && str::len(s) > 0u { let head = s[0]; if head == '+' as u8 || head == '-' as u8 || head == ' ' as u8 { let headstr = str::from_bytes([head]); // FIXME: not UTF-8 safe - let bytelen = str::len_bytes(s); - let numpart = str::unsafe::slice_bytes(s, 1u, bytelen); + let bytelen = str::len(s); + let numpart = str::slice(s, 1u, bytelen); ret headstr + padstr + numpart; } } diff --git a/src/libcore/float.rs b/src/libcore/float.rs index b21d154097faf..cea46dedf44d6 100644 --- a/src/libcore/float.rs +++ b/src/libcore/float.rs @@ -133,7 +133,7 @@ number represented by [num]. fn from_str(num: str) -> option { let pos = 0u; //Current byte position in the string. //Used to walk the string in O(n). - let len = str::len_bytes(num); //Length of the string, in bytes. + let len = str::len(num); //Length of the string, in bytes. if len == 0u { ret none; } let total = 0f; //Accumulated result diff --git a/src/libcore/str.rs b/src/libcore/str.rs index d81f2d45d4080..f0f274d75d520 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -4,9 +4,9 @@ Module: str String manipulation Strings are a packed UTF-8 representation of text, stored as null terminated -buffers of u8 bytes. Strings should be considered by character, -for correctness, but some UTF-8 unsafe functions are also provided. -For some heavy-duty uses, we recommend trying std::rope. +buffers of u8 bytes. Strings should be indexed in bytes, for efficiency, +but UTF-8 unsafe operations should be avoided. +For some heavy-duty uses, try std::rope. */ import option::{some, none}; @@ -37,6 +37,7 @@ export chars, substr, slice, + slice_chars, split, split_str, split_char, @@ -69,13 +70,14 @@ export lines_iter, // Searching + //index_chars, index, - byte_index, - byte_index_from, + index_from, rindex, + //rindex_chars, find, - find_bytes, - find_from_bytes, + find_from, + find_chars, contains, starts_with, ends_with, @@ -85,13 +87,13 @@ export is_empty, is_not_empty, is_whitespace, - len_bytes, - len_chars, len, + len, + len_chars, // Misc // FIXME: perhaps some more of this section shouldn't be exported? is_utf8, - substr_len_bytes, + substr_len, substr_len_chars, utf8_char_width, char_range_at, @@ -275,7 +277,7 @@ Failure: If the string does not contain any characters. */ fn pop_char(&s: str) -> char unsafe { - let end = len_bytes(s); + let end = len(s); let {ch:ch, prev:end} = char_range_at_reverse(s, end); s = unsafe::slice_bytes(s, 0u, end); ret ch; @@ -292,7 +294,7 @@ If the string does not contain any characters. */ fn shift_char(&s: str) -> char unsafe { let r = char_range_at(s, 0u); - s = unsafe::slice_bytes(s, r.next, len_bytes(s)); + s = unsafe::slice_bytes(s, r.next, len(s)); ret r.ch; } @@ -371,7 +373,7 @@ Convert a string to a vector of characters fn chars(s: str) -> [char] { let buf: [char] = []; let i = 0u; - let len = len_bytes(s); + let len = len(s); while i < len { let cur = char_range_at(s, i); buf += [cur.ch]; @@ -383,7 +385,7 @@ fn chars(s: str) -> [char] { /* Function: substr -Take a substring of another. Returns a string containing `len` chars +Take a substring of another. Returns a string containing `len` bytes starting at char offset `begin`. Failure: @@ -394,8 +396,33 @@ fn substr(s: str, begin: uint, len: uint) -> str { ret slice(s, begin, begin + len); } +// Function: slice +// +// Return a slice of the given string from the byte range [`begin`..`end`) +// or else fail when `begin` and `end` do not point to valid characters or +// beyond the last character of the string +fn slice(ss: str, begin: uint, end: uint) -> str { + alt maybe_slice(ss, begin, end) { + some(sli) { ret sli; } + none { fail "slice requires a valid start and end"; } + } +} + +// Function: maybe_slice +// +// Like slice, only returns an option +fn maybe_slice(ss: str, begin: uint, end: uint) -> option unsafe { + let sli = unsafe::slice_bytes(ss, begin, end); + + if is_utf8(bytes(sli)) { + ret some(sli); + } else { + ret none; + } +} + /* -Function: slice +Function: slice_chars Unicode-safe slice. Returns a slice of the given string containing the characters in the range [`begin`..`end`). `begin` and `end` are @@ -407,8 +434,9 @@ Failure: - If end is greater than the character length of the string FIXME: make faster by avoiding char conversion +FIXME: delete? */ -fn slice(s: str, begin: uint, end: uint) -> str { +fn slice_chars(s: str, begin: uint, end: uint) -> str { from_chars(vec::slice(chars(s), begin, end)) } @@ -447,7 +475,7 @@ fn splitn_byte(ss: str, sep: u8, count: uint) -> [str] unsafe { assert u8::is_ascii(sep); let vv = []; - let start = 0u, current = 0u, len = len_bytes(ss); + let start = 0u, current = 0u, len = len(ss); let splits_done = 0u; while splits_done < count && current < len { @@ -471,13 +499,13 @@ Splits a string into a vector of the substrings separated by a given string Note that this has recently been changed. For example: > assert ["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".") -FIXME: Boyer-Moore variation +FIXME: Boyer-Moore should be faster */ fn split_str(ss: str, sep: str) -> [str] unsafe { // unsafe is justified: we are splitting // UTF-8 with UTF-8, so the results will be OK - let sep_len = len_bytes(sep); + let sep_len = len(sep); assert sep_len > 0u; let vv = []; let start = 0u, start_match = 0u, current = 0u, matching = 0u; @@ -529,7 +557,7 @@ fn split(ss: str, sepfn: fn(cc: char)->bool) -> [str] { } }); - if len(accum) >= 0u || ends_with_sep { + if len_chars(accum) >= 0u || ends_with_sep { vv += [accum]; } @@ -554,7 +582,7 @@ up to `count` times fn splitn_char(ss: str, sep: char, count: uint) -> [str] unsafe { let vv = []; - let start = 0u, current = 0u, len = len_bytes(ss); + let start = 0u, current = 0u, len = len(ss); let splits_done = 0u; while splits_done < count && current < len { @@ -601,7 +629,7 @@ separated by whitespace */ fn words(ss: str) -> [str] { ret vec::filter( split(ss, {|cc| char::is_whitespace(cc)}), - {|w| 0u < str::len(w)}); + {|w| 0u < str::len_chars(w)}); } /* @@ -611,13 +639,13 @@ Create a vector of substrings of size `nn` */ fn windowed(nn: uint, ss: str) -> [str] { let ww = []; - let len = str::len(ss); + let len = str::len_chars(ss); assert 1u <= nn; let ii = 0u; while ii+nn <= len { - let w = slice( ss, ii, ii+nn ); + let w = slice_chars( ss, ii, ii+nn ); vec::push(ww,w); ii += 1u; } @@ -643,7 +671,7 @@ fn to_upper(s: str) -> str { map(s, char::to_upper) } -// FIXME: This is super-inefficient +// FIXME: This is super-inefficient: stop the extra slicing copies /* Function: replace @@ -661,21 +689,21 @@ The original string with all occurances of `from` replaced with `to` */ fn replace(s: str, from: str, to: str) -> str unsafe { assert is_not_empty(from); - if len_bytes(s) == 0u { + if len(s) == 0u { ret ""; } else if starts_with(s, from) { ret to + replace( - unsafe::slice_bytes(s, len_bytes(from), len_bytes(s)), + unsafe::slice_bytes(s, len(from), len(s)), from, to); } else { let idx; - alt find_bytes(s, from) { + alt find(s, from) { some(x) { idx = x; } none { ret s; } } let before = unsafe::slice_bytes(s, 0u, idx as uint); - let after = unsafe::slice_bytes(s, idx as uint + len_bytes(from), - len_bytes(s)); + let after = unsafe::slice_bytes(s, idx as uint + len(from), + len(s)); ret before + to + replace(after, from, to); } } @@ -734,7 +762,7 @@ Return true if a predicate matches all characters or if the string contains no characters */ fn all(s: str, it: fn(char) -> bool) -> bool{ - ret substr_all(s, 0u, len_bytes(s), it); + ret substr_all(s, 0u, len(s), it); } /* @@ -754,7 +782,7 @@ Apply a function to each character */ fn map(ss: str, ff: fn(char) -> char) -> str { let result = ""; - reserve(result, len_bytes(ss)); + reserve(result, len(ss)); chars_iter(ss, {|cc| str::push_char(result, ff(cc)); @@ -770,7 +798,7 @@ Iterate over the bytes in a string */ fn bytes_iter(ss: str, it: fn(u8)) { let pos = 0u; - let len = len_bytes(ss); + let len = len(ss); while (pos < len) { it(ss[pos]); @@ -784,7 +812,7 @@ Function: chars_iter Iterate over the characters in a string */ fn chars_iter(s: str, it: fn(char)) { - let pos = 0u, len = len_bytes(s); + let pos = 0u, len = len(s); while (pos < len) { let {ch, next} = char_range_at(s, pos); pos = next; @@ -836,12 +864,42 @@ Section: Searching // Function: index // -// Returns the index of the first matching char +// Returns the byte index of the first matching char // (as option some/none) fn index(ss: str, cc: char) -> option { + index_from(ss, cc, 0u, len(ss)) +} + +// Function: index_from +// +// Returns the byte index of the first matching char +// (as option some/none), starting at `nn` +fn index_from(ss: str, cc: char, start: uint, end: uint) -> option { + let bii = start; + while bii < end { + let {ch, next} = char_range_at(ss, bii); + + // found here? + if ch == cc { + ret some(bii); + } + + bii = next; + } + + // wasn't found + ret none; +} + +// Function: index_chars +// +// Returns the char index of the first matching char +// (as option some/none) +// FIXME: delete? +fn index_chars(ss: str, cc: char) -> option { let bii = 0u; let cii = 0u; - let len = len_bytes(ss); + let len = len(ss); while bii < len { let {ch, next} = char_range_at(ss, bii); @@ -858,32 +916,34 @@ fn index(ss: str, cc: char) -> option { ret none; } -// Function: byte_index +// Function: rindex // -// Returns the index of the first matching byte +// Returns the byte index of the first matching char // (as option some/none) -fn byte_index(s: str, b: u8) -> option { - byte_index_from(s, b, 0u, len_bytes(s)) -} +fn rindex(ss: str, cc: char) -> option { + let bii = len(ss); + while bii > 0u { + let {ch, prev} = char_range_at_reverse(ss, bii); + bii = prev; -// Function: byte_index_from -// -// Returns the index of the first matching byte within the range [`start`, -// `end`). -// (as option some/none) -fn byte_index_from(s: str, b: u8, start: uint, end: uint) -> option { - assert end <= len_bytes(s); + // found here? + if ch == cc { + ret some(bii); + } + } - str::as_bytes(s) { |v| vec::position_from(v, start, end) { |x| x == b } } + // wasn't found + ret none; } -// Function: rindex +// Function: rindex_chars // -// Returns the index of the first matching char +// Returns the char index of the first matching char // (as option some/none) -fn rindex(ss: str, cc: char) -> option { - let bii = len_bytes(ss); - let cii = len(ss); +// FIXME: delete? +fn rindex_chars(ss: str, cc: char) -> option { + let bii = len(ss); + let cii = len_chars(ss); while bii > 0u { let {ch, prev} = char_range_at_reverse(ss, bii); cii -= 1u; @@ -899,25 +959,25 @@ fn rindex(ss: str, cc: char) -> option { ret none; } -//Function: find_bytes +//Function: find // -// Find the char position of the first instance of one string +// Find the byte position of the first instance of one string // within another, or return option::none -fn find_bytes(haystack: str, needle: str) -> option { - find_from_bytes(haystack, needle, 0u, len_bytes(haystack)) +fn find(haystack: str, needle: str) -> option { + find_from(haystack, needle, 0u, len(haystack)) } -//Function: find_from_bytes +//Function: find_from // -// Find the char position of the first instance of one string +// Find the byte position of the first instance of one string // within another, or return option::none // // FIXME: Boyer-Moore should be significantly faster -fn find_from_bytes(haystack: str, needle: str, start: uint, end:uint) +fn find_from(haystack: str, needle: str, start: uint, end:uint) -> option { - assert end <= len_bytes(haystack); + assert end <= len(haystack); - let needle_len = len_bytes(needle); + let needle_len = len(needle); if needle_len == 0u { ret some(start); } if needle_len > end { ret none; } @@ -937,12 +997,13 @@ fn find_from_bytes(haystack: str, needle: str, start: uint, end:uint) ret none; } -// Function: find +// Function: find_chars // // Find the char position of the first instance of one string // within another, or return option::none -fn find(haystack: str, needle: str) -> option { - alt find_bytes(haystack, needle) { +// FIXME: delete? +fn find_chars(haystack: str, needle: str) -> option { + alt find(haystack, needle) { none { ret none; } some(nn) { ret some(b2c_pos(haystack, nn)); } } @@ -953,7 +1014,7 @@ fn find(haystack: str, needle: str) -> option { // Convert a byte position into a char position // within a given string fn b2c_pos(ss: str, bpos: uint) -> uint { - assert bpos == 0u || bpos < len_bytes(ss); + assert bpos == 0u || bpos < len(ss); let ii = 0u; let cpos = 0u; @@ -978,7 +1039,7 @@ haystack - The string to look in needle - The string to look for */ fn contains(haystack: str, needle: str) -> bool { - option::is_some(find_bytes(haystack, needle)) + option::is_some(find(haystack, needle)) } /* @@ -992,8 +1053,8 @@ haystack - The string to look in needle - The string to look for */ fn starts_with(haystack: str, needle: str) -> bool unsafe { - let haystack_len: uint = len_bytes(haystack); - let needle_len: uint = len_bytes(needle); + let haystack_len: uint = len(haystack); + let needle_len: uint = len(needle); if needle_len == 0u { ret true; } if needle_len > haystack_len { ret false; } ret eq(unsafe::slice_bytes(haystack, 0u, needle_len), needle); @@ -1030,7 +1091,7 @@ Function: is_ascii Determines if a string contains only ASCII characters */ fn is_ascii(s: str) -> bool { - let i: uint = len_bytes(s); + let i: uint = len(s); while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { ret false; } } ret true; } @@ -1059,10 +1120,11 @@ fn is_whitespace(s: str) -> bool { } -// Function: len_bytes +// Function: len // -// Returns the string length in bytes -pure fn len_bytes(s: str) -> uint unsafe { +// Returns the string length/size in bytes +// not counting the null terminator +pure fn len(s: str) -> uint unsafe { as_bytes(s) { |v| let vlen = vec::len(v); // There should always be a null terminator @@ -1071,16 +1133,11 @@ pure fn len_bytes(s: str) -> uint unsafe { } } -// Function: len -// -// String length or size in characters. -// (Synonym: len_chars) -fn len(s: str) -> uint { - substr_len_chars(s, 0u, len_bytes(s)) +// FIXME: delete? +fn len_chars(s: str) -> uint { + substr_len_chars(s, 0u, len(s)) } -fn len_chars(s: str) -> uint { len(s) } - /* Section: Misc */ @@ -1125,6 +1182,8 @@ Safety note: - This function does not check whether the substring is valid. - This function fails if `byte_offset` or `byte_len` do not represent valid positions inside `s` + +FIXME: delete? */ fn substr_len_chars(s: str, byte_start: uint, byte_len: uint) -> uint { let i = byte_start; @@ -1140,7 +1199,7 @@ fn substr_len_chars(s: str, byte_start: uint, byte_len: uint) -> uint { } /* -Function: substr_len_bytes +Function: substr_len As byte_len but for a substring @@ -1157,10 +1216,8 @@ Safety note: This function fails if `byte_offset` or `char_len` do not represent valid positions in `s` - -FIXME: rename to 'substr_len_bytes' */ -fn substr_len_bytes(s: str, byte_offset: uint, char_len: uint) -> uint { +fn substr_len(s: str, byte_offset: uint, char_len: uint) -> uint { let i = byte_offset; let chars = 0u; while chars < char_len { @@ -1201,7 +1258,7 @@ This function can be used to iterate over the unicode characters of a string. Example: > let s = "中华Việt Nam"; > let i = 0u; -> while i < str::len_bytes(s) { +> while i < str::len(s) { > let {ch, next} = str::char_range_at(s, i); > std::io::println(#fmt("%u: %c",i,ch)); > i = next; @@ -1401,12 +1458,13 @@ fn reserve(&ss: str, nn: uint) { // These functions may create invalid UTF-8 strings and eat your baby. mod unsafe { export + // FIXME: stop exporting several of these from_bytes, from_byte, slice_bytes, slice_bytes_safe_range, push_byte, - push_bytes, // note: wasn't exported + push_bytes, pop_byte, shift_byte; @@ -1439,9 +1497,8 @@ mod unsafe { - If end is greater than the length of the string. */ unsafe fn slice_bytes(s: str, begin: uint, end: uint) -> str unsafe { - // FIXME: Typestate precondition assert (begin <= end); - assert (end <= len_bytes(s)); + assert (end <= len(s)); let v = as_bytes(s) { |v| vec::slice(v, begin, end) }; v += [0u8]; @@ -1458,7 +1515,7 @@ mod unsafe { unsafe fn slice_bytes_safe_range(s: str, begin: uint, end: uint) : uint::le(begin, end) -> str { // would need some magic to make this a precondition - assert (end <= len_bytes(s)); + assert (end <= len(s)); ret slice_bytes(s, begin, end); } @@ -1480,7 +1537,7 @@ mod unsafe { // // Removes the last byte from a string and returns it. (Not UTF-8 safe). unsafe fn pop_byte(&s: str) -> u8 unsafe { - let len = len_bytes(s); + let len = len(s); assert (len > 0u); let b = s[len - 1u]; s = unsafe::slice_bytes(s, 0u, len - 1u); @@ -1491,7 +1548,7 @@ mod unsafe { // // Removes the first byte from a string and returns it. (Not UTF-8 safe). unsafe fn shift_byte(&s: str) -> u8 unsafe { - let len = len_bytes(s); + let len = len(s); assert (len > 0u); let b = s[0]; s = unsafe::slice_bytes(s, 1u, len); @@ -1521,38 +1578,47 @@ mod tests { #[test] fn test_len() { - assert (len_bytes("") == 0u); - assert (len_bytes("hello world") == 11u); - assert (len_bytes("\x63") == 1u); - assert (len_bytes("\xa2") == 2u); - assert (len_bytes("\u03c0") == 2u); - assert (len_bytes("\u2620") == 3u); - assert (len_bytes("\U0001d11e") == 4u); - assert (len("") == 0u); assert (len("hello world") == 11u); assert (len("\x63") == 1u); - assert (len("\xa2") == 1u); - assert (len("\u03c0") == 1u); - assert (len("\u2620") == 1u); - assert (len("\U0001d11e") == 1u); - assert (len("ประเทศไทย中华Việt Nam") == 19u); + assert (len("\xa2") == 2u); + assert (len("\u03c0") == 2u); + assert (len("\u2620") == 3u); + assert (len("\U0001d11e") == 4u); + + assert (len_chars("") == 0u); + assert (len_chars("hello world") == 11u); + assert (len_chars("\x63") == 1u); + assert (len_chars("\xa2") == 1u); + assert (len_chars("\u03c0") == 1u); + assert (len_chars("\u2620") == 1u); + assert (len_chars("\U0001d11e") == 1u); + assert (len_chars("ประเทศไทย中华Việt Nam") == 19u); } #[test] - fn test_index() { - assert ( index("hello", 'h') == some(0u)); - assert ( index("hello", 'e') == some(1u)); - assert ( index("hello", 'o') == some(4u)); - assert ( index("hello", 'z') == none); + fn test_index_chars() { + assert ( index_chars("hello", 'h') == some(0u)); + assert ( index_chars("hello", 'e') == some(1u)); + assert ( index_chars("hello", 'o') == some(4u)); + assert ( index_chars("hello", 'z') == none); } #[test] fn test_rindex() { - assert (rindex("hello", 'l') == some(3u)); - assert (rindex("hello", 'o') == some(4u)); - assert (rindex("hello", 'h') == some(0u)); - assert (rindex("hello", 'z') == none); + assert rindex("hello", 'l') == some(3u); + assert rindex("hello", 'o') == some(4u); + assert rindex("hello", 'h') == some(0u); + assert rindex("hello", 'z') == none; + assert rindex("ประเทศไทย中华Việt Nam", '华') == some(30u); + } + + #[test] + fn test_rindex_chars() { + assert (rindex_chars("hello", 'l') == some(3u)); + assert (rindex_chars("hello", 'o') == some(4u)); + assert (rindex_chars("hello", 'h') == some(0u)); + assert (rindex_chars("hello", 'z') == none); } #[test] @@ -1755,59 +1821,59 @@ mod tests { } #[test] - fn test_find_bytes() { + fn test_find() { // byte positions - assert (find_bytes("banana", "apple pie") == none); - assert (find_bytes("", "") == some(0u)); + assert (find("banana", "apple pie") == none); + assert (find("", "") == some(0u)); let data = "ประเทศไทย中华Việt Nam"; - assert (find_bytes(data, "") == some(0u)); - assert (find_bytes(data, "ประเ") == some( 0u)); - assert (find_bytes(data, "ะเ") == some( 6u)); - assert (find_bytes(data, "中华") == some(27u)); - assert (find_bytes(data, "ไท华") == none); + assert (find(data, "") == some(0u)); + assert (find(data, "ประเ") == some( 0u)); + assert (find(data, "ะเ") == some( 6u)); + assert (find(data, "中华") == some(27u)); + assert (find(data, "ไท华") == none); } #[test] - fn test_find_from_bytes() { + fn test_find_from() { // byte positions - assert (find_from_bytes("", "", 0u, 0u) == some(0u)); + assert (find_from("", "", 0u, 0u) == some(0u)); let data = "abcabc"; - assert find_from_bytes(data, "ab", 0u, 6u) == some(0u); - assert find_from_bytes(data, "ab", 2u, 6u) == some(3u); - assert find_from_bytes(data, "ab", 2u, 4u) == none; + assert find_from(data, "ab", 0u, 6u) == some(0u); + assert find_from(data, "ab", 2u, 6u) == some(3u); + assert find_from(data, "ab", 2u, 4u) == none; let data = "ประเทศไทย中华Việt Nam"; data += data; - assert find_from_bytes(data, "", 0u, 43u) == some(0u); - assert find_from_bytes(data, "", 6u, 43u) == some(6u); + assert find_from(data, "", 0u, 43u) == some(0u); + assert find_from(data, "", 6u, 43u) == some(6u); - assert find_from_bytes(data, "ประ", 0u, 43u) == some( 0u); - assert find_from_bytes(data, "ทศไ", 0u, 43u) == some(12u); - assert find_from_bytes(data, "ย中", 0u, 43u) == some(24u); - assert find_from_bytes(data, "iệt", 0u, 43u) == some(34u); - assert find_from_bytes(data, "Nam", 0u, 43u) == some(40u); + assert find_from(data, "ประ", 0u, 43u) == some( 0u); + assert find_from(data, "ทศไ", 0u, 43u) == some(12u); + assert find_from(data, "ย中", 0u, 43u) == some(24u); + assert find_from(data, "iệt", 0u, 43u) == some(34u); + assert find_from(data, "Nam", 0u, 43u) == some(40u); - assert find_from_bytes(data, "ประ", 43u, 86u) == some(43u); - assert find_from_bytes(data, "ทศไ", 43u, 86u) == some(55u); - assert find_from_bytes(data, "ย中", 43u, 86u) == some(67u); - assert find_from_bytes(data, "iệt", 43u, 86u) == some(77u); - assert find_from_bytes(data, "Nam", 43u, 86u) == some(83u); + assert find_from(data, "ประ", 43u, 86u) == some(43u); + assert find_from(data, "ทศไ", 43u, 86u) == some(55u); + assert find_from(data, "ย中", 43u, 86u) == some(67u); + assert find_from(data, "iệt", 43u, 86u) == some(77u); + assert find_from(data, "Nam", 43u, 86u) == some(83u); } #[test] - fn test_find() { + fn test_find_chars() { // char positions - assert (find("banana", "apple pie") == none); - assert (find("", "") == some(0u)); + assert (find_chars("banana", "apple pie") == none); + assert (find_chars("", "") == some(0u)); let data = "ประเทศไทย中华Việt Nam"; - assert (find(data, "") == some(0u)); - assert (find(data, "ประเ") == some(0u)); - assert (find(data, "ะเ") == some(2u)); - assert (find(data, "中华") == some(9u)); - assert (find(data, "ไท华") == none); + assert (find_chars(data, "") == some(0u)); + assert (find_chars(data, "ประเ") == some(0u)); + assert (find_chars(data, "ะเ") == some(2u)); + assert (find_chars(data, "中华") == some(9u)); + assert (find_chars(data, "ไท华") == none); } #[test] @@ -1821,13 +1887,13 @@ mod tests { #[test] fn test_substr() { fn t(a: str, b: str, start: int) { - assert (eq(substr(a, start as uint, len_bytes(b)), b)); + assert (eq(substr(a, start as uint, len(b)), b)); } t("hello", "llo", 2); t("hello", "el", 1); assert "ะเทศไท" - == substr("ประเทศไทย中华Việt Nam", 2u, 6u); + == substr("ประเทศไทย中华Việt Nam", 6u, 18u); } #[test] @@ -1975,13 +2041,65 @@ mod tests { assert (eq("ab", slice("abc", 0u, 2u))); assert (eq("bc", slice("abc", 1u, 3u))); assert (eq("", slice("abc", 1u, 1u))); - assert (eq("\u65e5", slice("\u65e5\u672c", 0u, 1u))); + assert (eq("\u65e5", slice("\u65e5\u672c", 0u, 3u))); let data = "ประเทศไทย中华"; - assert (eq("ป", slice(data, 0u, 1u))); - assert (eq("ร", slice(data, 1u, 2u))); - assert (eq("华", slice(data, 10u, 11u))); + assert (eq("ป", slice(data, 0u, 3u))); + assert (eq("ร", slice(data, 3u, 6u))); assert (eq("", slice(data, 1u, 1u))); + assert (eq("华", slice(data, 30u, 33u))); + + fn a_million_letter_X() -> str { + let i = 0; + let rs = ""; + while i < 100000 { rs += "华华华华华华华华华华"; i += 1; } + ret rs; + } + fn half_a_million_letter_X() -> str { + let i = 0; + let rs = ""; + while i < 100000 { rs += "华华华华华"; i += 1; } + ret rs; + } + assert (eq(half_a_million_letter_X(), + slice(a_million_letter_X(), 0u, (3u * 500000u)))); + } + + #[test] + fn test_maybe_slice() { + let ss = "中华Việt Nam"; + + assert none == maybe_slice(ss, 0u, 2u); + assert none == maybe_slice(ss, 1u, 3u); + assert none == maybe_slice(ss, 1u, 2u); + assert some("华") == maybe_slice(ss, 3u, 6u); + assert some("Việt Nam") == maybe_slice(ss, 6u, 16u); + assert none == maybe_slice(ss, 4u, 16u); + + /* 0: 中 + 3: 华 + 6: V + 7: i + 8: ệ + 11: t + 12: + 13: N + 14: a + 15: m */ + } + + #[test] + fn test_slice_chars() { + assert (eq("ab", slice_chars("abc", 0u, 2u))); + assert (eq("bc", slice_chars("abc", 1u, 3u))); + assert (eq("", slice_chars("abc", 1u, 1u))); + assert (eq("\u65e5", slice_chars("\u65e5\u672c", 0u, 1u))); + + let data = "ประเทศไทย中华"; + assert (eq("ป", slice_chars(data, 0u, 1u))); + assert (eq("ร", slice_chars(data, 1u, 2u))); + assert (eq("华", slice_chars(data, 10u, 11u))); + assert (eq("", slice_chars(data, 1u, 1u))); fn a_million_letter_X() -> str { let i = 0; @@ -1996,7 +2114,7 @@ mod tests { ret rs; } assert (eq(half_a_million_letter_X(), - slice(a_million_letter_X(), 0u, 500000u))); + slice_chars(a_million_letter_X(), 0u, 500000u))); } #[test] @@ -2148,7 +2266,7 @@ mod tests { let v: [u8] = bytes(s1); let s2: str = from_bytes(v); let i: uint = 0u; - let n1: uint = len_bytes(s1); + let n1: uint = len(s1); let n2: uint = vec::len::(v); assert (n1 == n2); while i < n1 { diff --git a/src/libcore/u64.rs b/src/libcore/u64.rs index ac5f83cc415a4..77b658f066234 100644 --- a/src/libcore/u64.rs +++ b/src/libcore/u64.rs @@ -118,8 +118,8 @@ Function: from_str Parse a string as an unsigned integer. */ fn from_str(buf: str, radix: u64) -> option { - if str::len_bytes(buf) == 0u { ret none; } - let i = str::len_bytes(buf) - 1u; + if str::len(buf) == 0u { ret none; } + let i = str::len(buf) - 1u; let power = 1u64, n = 0u64; while true { alt char::to_digit(buf[i] as char, radix as uint) { diff --git a/src/libcore/uint.rs b/src/libcore/uint.rs index b5e425d2b1de5..230c6a2e90c55 100644 --- a/src/libcore/uint.rs +++ b/src/libcore/uint.rs @@ -248,7 +248,7 @@ fn to_str(num: uint, radix: uint) -> str { n /= radix; } let s1: str = ""; - let len: uint = str::len_bytes(s); + let len: uint = str::len(s); while len != 0u { len -= 1u; s1 += str::from_byte(s[len]); } ret s1; } diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs index 30abbb1e70aa9..3ba2f8b105ad0 100644 --- a/src/libstd/fs.rs +++ b/src/libstd/fs.rs @@ -93,8 +93,8 @@ fn connect(pre: path, post: path) -> path unsafe { let pre_ = pre; let post_ = post; let sep = os_fs::path_sep as u8; - let pre_len = str::len_bytes(pre); - let post_len = str::len_bytes(post); + let pre_len = str::len(pre); + let post_len = str::len(post); if pre_len > 1u && pre[pre_len-1u] == sep { str::unsafe::pop_byte(pre_); } if post_len > 1u && post[0] == sep { str::unsafe::shift_byte(post_); } ret pre_ + path_sep() + post_; @@ -170,7 +170,7 @@ Lists the contents of a directory. */ fn list_dir(p: path) -> [str] { let p = p; - let pl = str::len_bytes(p); + let pl = str::len(p); if pl == 0u || p[pl - 1u] as char != os_fs::path_sep { p += path_sep(); } let full_paths: [str] = []; for filename: str in os_fs::list_dir(p) { @@ -336,7 +336,7 @@ fn normalize(p: path) -> path { let s = reabsolute(p, s); let s = reterminate(p, s); - let s = if str::len_bytes(s) == 0u { + let s = if str::len(s) == 0u { "." } else { s @@ -403,7 +403,7 @@ fn normalize(p: path) -> path { } fn reterminate(orig: path, new: path) -> path { - let last = orig[str::len_bytes(orig) - 1u]; + let last = orig[str::len(orig) - 1u]; if last == os_fs::path_sep as u8 || last == os_fs::path_sep as u8 { ret new + path_sep(); diff --git a/src/libstd/getopts.rs b/src/libstd/getopts.rs index dba406c889ecd..7b409053cb0b1 100644 --- a/src/libstd/getopts.rs +++ b/src/libstd/getopts.rs @@ -141,7 +141,7 @@ of matches and a vector of free strings. type match = {opts: [opt], vals: [mutable [optval]], free: [str]}; fn is_arg(arg: str) -> bool { - ret str::len_bytes(arg) > 1u && arg[0] == '-' as u8; + ret str::len(arg) > 1u && arg[0] == '-' as u8; } fn name_str(nm: name) -> str { @@ -218,7 +218,7 @@ fn getopts(args: [str], opts: [opt]) -> result unsafe { let i = 0u; while i < l { let cur = args[i]; - let curlen = str::len_bytes(cur); + let curlen = str::len(cur); if !is_arg(cur) { free += [cur]; } else if str::eq(cur, "--") { @@ -229,7 +229,7 @@ fn getopts(args: [str], opts: [opt]) -> result unsafe { let names; let i_arg = option::none::; if cur[1] == '-' as u8 { - let tail = str::unsafe::slice_bytes(cur, 2u, curlen); + let tail = str::slice(cur, 2u, curlen); let tail_eq = str::splitn_char(tail, '=', 1u); if vec::len(tail_eq) <= 1u { names = [long(tail)]; diff --git a/src/libstd/json.rs b/src/libstd/json.rs index 127cd93952e78..7a888f250eda3 100644 --- a/src/libstd/json.rs +++ b/src/libstd/json.rs @@ -76,7 +76,7 @@ fn rest(s: str) -> str { fn from_str_str(s: str) -> (option, str) { let pos = 0u; - let len = str::len_bytes(s); + let len = str::len(s); let escape = false; let res = ""; @@ -172,7 +172,7 @@ fn from_str_dict(s: str) -> (option, str) { fn from_str_float(s: str) -> (option, str) { let pos = 0u; - let len = str::len_bytes(s); + let len = str::len(s); let res = 0f; let neg = 1f; @@ -205,7 +205,8 @@ fn from_str_float(s: str) -> (option, str) { } if pos == len { - ret (some(num(neg * res)), str::slice(s, pos, str::len(s))); + ret (some(num(neg * res)), + str::slice(s, pos, str::len(s))); } let dec = 1f; diff --git a/src/libstd/rope.rs b/src/libstd/rope.rs index 07ee837a9b918..10144cc973151 100644 --- a/src/libstd/rope.rs +++ b/src/libstd/rope.rs @@ -65,7 +65,7 @@ Performance notes: - the function runs in linear time. */ fn of_str(str: @str) -> rope { - ret of_substr(str, 0u, str::len_bytes(*str)); + ret of_substr(str, 0u, str::len(*str)); } /* @@ -93,7 +93,7 @@ Safety notes: */ fn of_substr(str: @str, byte_offset: uint, byte_len: uint) -> rope { if byte_len == 0u { ret node::empty; } - if byte_offset + byte_len > str::len_bytes(*str) { fail; } + if byte_offset + byte_len > str::len(*str) { fail; } ret node::content(node::of_substr(str, byte_offset, byte_len)); } @@ -721,7 +721,7 @@ mod node { the length of `str`. */ fn of_str(str: @str) -> @node { - ret of_substr(str, 0u, str::len_bytes(*str)); + ret of_substr(str, 0u, str::len(*str)); } /* @@ -768,7 +768,7 @@ mod node { */ fn of_substr_unsafer(str: @str, byte_start: uint, byte_len: uint, char_len: uint) -> @node { - assert(byte_start + byte_len <= str::len_bytes(*str)); + assert(byte_start + byte_len <= str::len(*str)); let candidate = @leaf({ byte_offset: byte_start, byte_len: byte_len, @@ -795,7 +795,7 @@ mod node { if i == 0u { first_leaf_char_len } else { hint_max_leaf_char_len }; let chunk_byte_len = - str::substr_len_bytes(*str, offset, chunk_char_len); + str::substr_len(*str, offset, chunk_char_len); nodes[i] = @leaf({ byte_offset: offset, byte_len: chunk_byte_len, @@ -1059,9 +1059,9 @@ mod node { ret node; } let byte_offset = - str::substr_len_bytes(*x.content, 0u, char_offset); + str::substr_len(*x.content, 0u, char_offset); let byte_len = - str::substr_len_bytes(*x.content, byte_offset, char_len); + str::substr_len(*x.content, byte_offset, char_len); ret @leaf({byte_offset: byte_offset, byte_len: byte_len, char_len: char_len, @@ -1345,7 +1345,7 @@ mod tests { fn aux(str: @mutable str, node: @node::node) unsafe { alt(*node) { node::leaf(x) { - *str += str::unsafe::slice_bytes( + *str += str::slice( *x.content, x.byte_offset, x.byte_offset + x.byte_len); } @@ -1373,7 +1373,7 @@ mod tests { let sample = @"0123456789ABCDE"; let r = of_str(sample); - assert char_len(r) == str::len(*sample); + assert char_len(r) == str::len_chars(*sample); assert rope_to_string(r) == *sample; } @@ -1384,11 +1384,11 @@ mod tests { while i < 10 { *buf = *buf + *buf; i+=1;} let sample = @*buf; let r = of_str(sample); - assert char_len(r) == str::len(*sample); + assert char_len(r) == str::len_chars(*sample); assert rope_to_string(r) == *sample; let string_iter = 0u; - let string_len = str::len_bytes(*sample); + let string_len = str::len(*sample); let rope_iter = iterator::char::start(r); let equal = true; let pos = 0u; @@ -1427,7 +1427,7 @@ mod tests { } } - assert len == str::len(*sample); + assert len == str::len_chars(*sample); } #[test] diff --git a/src/libstd/sha1.rs b/src/libstd/sha1.rs index c28d67a7526cd..9360a84fb97af 100644 --- a/src/libstd/sha1.rs +++ b/src/libstd/sha1.rs @@ -368,11 +368,11 @@ mod tests { // Test that it works when accepting the message in pieces for t: test in tests { - let len = str::len_bytes(t.input); + let len = str::len(t.input); let left = len; while left > 0u { let take = (left + 1u) / 2u; - sh.input_str(str::unsafe::slice_bytes(t.input, len - left, + sh.input_str(str::slice(t.input, len - left, take + len - left)); left = left - take; } diff --git a/src/rustdoc/markdown_pass.rs b/src/rustdoc/markdown_pass.rs index 1f82e892c0e51..d473f25d09093 100644 --- a/src/rustdoc/markdown_pass.rs +++ b/src/rustdoc/markdown_pass.rs @@ -56,10 +56,10 @@ fn should_write_modules_last() { fn d() { }" ); - let idx_a = option::get(str::find_bytes(markdown, "# Module `a`")); - let idx_b = option::get(str::find_bytes(markdown, "## Function `b`")); - let idx_c = option::get(str::find_bytes(markdown, "# Module `c`")); - let idx_d = option::get(str::find_bytes(markdown, "## Function `d`")); + let idx_a = option::get(str::find(markdown, "# Module `a`")); + let idx_b = option::get(str::find(markdown, "## Function `b`")); + let idx_c = option::get(str::find(markdown, "# Module `c`")); + let idx_d = option::get(str::find(markdown, "## Function `d`")); assert idx_b < idx_d; assert idx_d < idx_a; diff --git a/src/rustdoc/unindent_pass.rs b/src/rustdoc/unindent_pass.rs index b52316ccb15b9..59005d8266b1e 100644 --- a/src/rustdoc/unindent_pass.rs +++ b/src/rustdoc/unindent_pass.rs @@ -67,7 +67,7 @@ fn unindent(s: str) -> str { if str::is_whitespace(line) { line } else { - assert str::len_bytes(line) >= min_indent; + assert str::len(line) >= min_indent; str::slice(line, min_indent, str::len(line)) } }; diff --git a/src/test/bench/99bob-iter.rs b/src/test/bench/99bob-iter.rs index 0e338ae5c52fa..6abeb63471fd0 100644 --- a/src/test/bench/99bob-iter.rs +++ b/src/test/bench/99bob-iter.rs @@ -31,7 +31,7 @@ fn sub(t: str, n: int) -> str unsafe { 1 { ns = "1 bottle"; } _ { ns = int::to_str(n, 10u) + " bottles"; } } - while i < str::len_bytes(t) { + while i < str::len(t) { if t[i] == '#' as u8 { b += ns; } else { str::unsafe::push_byte(b, t[i]); } i += 1u; diff --git a/src/test/bench/99bob-simple.rs b/src/test/bench/99bob-simple.rs index 351f5463d5291..3fcf6abebb193 100644 --- a/src/test/bench/99bob-simple.rs +++ b/src/test/bench/99bob-simple.rs @@ -31,7 +31,7 @@ fn sub(t: str, n: int) -> str unsafe { 1 { ns = "1 bottle"; } _ { ns = int::to_str(n, 10u) + " bottles"; } } - while i < str::len_bytes(t) { + while i < str::len(t) { if t[i] == '#' as u8 { b += ns; } else { str::unsafe::push_byte(b, t[i]); } i += 1u; diff --git a/src/test/bench/shootout-fasta.rs b/src/test/bench/shootout-fasta.rs index ef82f0742de16..28431d3353f7e 100644 --- a/src/test/bench/shootout-fasta.rs +++ b/src/test/bench/shootout-fasta.rs @@ -49,26 +49,26 @@ fn make_random_fasta(id: str, desc: str, genelist: [aminoacids], n: int) { uint::range(0u, n as uint) {|_i| str::push_char(op, select_random(myrandom_next(rng, 100u32), genelist)); - if str::len_bytes(op) >= LINE_LENGTH() { + if str::len(op) >= LINE_LENGTH() { log(debug, op); op = ""; } } - if str::len_bytes(op) > 0u { log(debug, op); } + if str::len(op) > 0u { log(debug, op); } } fn make_repeat_fasta(id: str, desc: str, s: str, n: int) unsafe { log(debug, ">" + id + " " + desc); let op: str = ""; - let sl: uint = str::len_bytes(s); + let sl: uint = str::len(s); uint::range(0u, n as uint) {|i| str::unsafe::push_byte(op, s[i % sl]); - if str::len_bytes(op) >= LINE_LENGTH() { + if str::len(op) >= LINE_LENGTH() { log(debug, op); op = ""; } } - if str::len_bytes(op) > 0u { log(debug, op); } + if str::len(op) > 0u { log(debug, op); } } fn acid(ch: char, prob: u32) -> aminoacids { ret {ch: ch, prob: prob}; } diff --git a/src/test/run-pass/bind-native-fn.rs b/src/test/run-pass/bind-native-fn.rs index 88b0fbf4b3553..0d839d02200a7 100644 --- a/src/test/run-pass/bind-native-fn.rs +++ b/src/test/run-pass/bind-native-fn.rs @@ -13,7 +13,7 @@ native mod libc { fn main() { let s = "hello world\n"; let b = str::bytes(s); - let l = str::len_bytes(s); + let l = str::len(s); let b8 = unsafe { vec::unsafe::to_ptr(b) }; libc::write(0i32, b8, l); let a = bind libc::write(0i32, _, _); diff --git a/src/test/run-pass/string-self-append.rs b/src/test/run-pass/string-self-append.rs index 260b0194a0c94..c09d6c7cb56bc 100644 --- a/src/test/run-pass/string-self-append.rs +++ b/src/test/run-pass/string-self-append.rs @@ -7,8 +7,8 @@ fn main() { let i = 20; let expected_len = 1u; while i > 0 { - log(error, str::len_bytes(a)); - assert (str::len_bytes(a) == expected_len); + log(error, str::len(a)); + assert (str::len(a) == expected_len); a += a; i -= 1; expected_len *= 2u; diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs index cd591866c2488..2d38b9dd24584 100644 --- a/src/test/run-pass/utf8_chars.rs +++ b/src/test/run-pass/utf8_chars.rs @@ -7,8 +7,8 @@ fn main() { let chs: [char] = ['e', 'é', '€', 0x10000 as char]; let s: str = str::from_chars(chs); - assert (str::len_bytes(s) == 10u); - assert (str::len(s) == 4u); + assert (str::len(s) == 10u); + assert (str::len_chars(s) == 4u); assert (vec::len::(str::chars(s)) == 4u); assert (str::eq(str::from_chars(str::chars(s)), s)); assert (str::char_at(s, 0u) == 'e');