Implement _str.len() to return the number of bytes, rename it to byte_len(),

and add a test.
This commit is contained in:
Jeffrey Yasskin 2010-07-25 00:36:03 -07:00 committed by Graydon Hoare
parent 581a95a804
commit 3f6e8ffe64
4 changed files with 33 additions and 5 deletions

View File

@ -506,6 +506,7 @@ TEST_XFAILS_LLVM := $(TASK_XFAILS) \
str-append.rs \
str-concat.rs \
str-idx.rs \
str-lib.rs \
tag.rs \
tail-cps.rs \
tail-direct.rs \

View File

@ -3,7 +3,7 @@ import rustrt.sbuf;
native "rust" mod rustrt {
type sbuf;
fn str_buf(str s) -> sbuf;
fn str_len(str s) -> uint;
fn str_byte_len(str s) -> uint;
fn str_alloc(uint n_bytes) -> str;
fn refcount[T](str s) -> uint;
}
@ -13,7 +13,7 @@ fn is_utf8(vec[u8] v) -> bool {
}
fn is_ascii(str s) -> bool {
let uint i = len(s);
let uint i = byte_len(s);
while (i > 0u) {
i -= 1u;
if ((s.(i) & 0x80u8) != 0u8) {
@ -27,8 +27,13 @@ fn alloc(uint n_bytes) -> str {
ret rustrt.str_alloc(n_bytes);
}
fn len(str s) -> uint {
ret rustrt.str_len(s);
// Returns the number of bytes (a.k.a. UTF-8 code units) in s.
// Contrast with a function that would return the number of code
// points (char's), combining character sequences, words, etc. See
// http://icu-project.org/apiref/icu4c/classBreakIterator.html for a
// way to implement those.
fn byte_len(str s) -> uint {
ret rustrt.str_byte_len(s);
}
fn buf(str s) -> sbuf {
@ -39,5 +44,5 @@ fn bytes(&str s) -> vec[u8] {
fn ith(str s, uint i) -> u8 {
ret s.(i);
}
ret _vec.init_fn[u8](bind ith(s, _), _str.len(s));
ret _vec.init_fn[u8](bind ith(s, _), _str.byte_len(s));
}

View File

@ -115,6 +115,12 @@ str_buf(rust_task *task, rust_str *s)
return (char const *)&s->data[0];
}
extern "C" CDECL size_t
str_byte_len(rust_task *task, rust_str *s)
{
return s->fill - 1; // -1 for the '\0' terminator.
}
extern "C" CDECL void *
vec_buf(rust_task *task, type_desc *ty, rust_vec *v, size_t offset)
{

View File

@ -0,0 +1,16 @@
use std;
import std._str;
fn test_bytes_len() {
check (_str.byte_len("") == 0u);
check (_str.byte_len("hello world") == 11u);
check (_str.byte_len("\x63") == 1u);
check (_str.byte_len("\xa2") == 2u);
check (_str.byte_len("\u03c0") == 2u);
check (_str.byte_len("\u2620") == 3u);
check (_str.byte_len("\U0001d11e") == 4u);
}
fn main() {
test_bytes_len();
}