Only keep one copy of the UTF8_CHAR_WIDTH table.

… instead of one of each of libcore and libstd_unicode.

Move the `utf8_char_width` function to `core::str`
under the `str_internals` unstable feature.
This commit is contained in:
Simon Sapin 2017-03-01 22:41:44 +01:00
parent 691eba1358
commit 031f9b15df
6 changed files with 12 additions and 30 deletions

View File

@ -54,6 +54,7 @@
#![feature(slice_patterns)]
#![feature(specialization)]
#![feature(staged_api)]
#![feature(str_internals)]
#![feature(trusted_len)]
#![feature(unicode)]
#![feature(unique)]

View File

@ -62,9 +62,9 @@ use core::iter::{FromIterator, FusedIterator};
use core::mem;
use core::ops::{self, Add, AddAssign, Index, IndexMut};
use core::ptr;
use core::str as core_str;
use core::str::pattern::Pattern;
use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
use std_unicode::str as unicode_str;
use borrow::{Cow, ToOwned};
use range::RangeArgument;
@ -575,7 +575,7 @@ impl String {
if byte < 128 {
// subseqidx handles this
} else {
let w = unicode_str::utf8_char_width(byte);
let w = core_str::utf8_char_width(byte);
match w {
2 => {

View File

@ -1352,6 +1352,13 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
];
/// Given a first byte, determine how many bytes are in this UTF-8 character
#[unstable(feature = "str_internals", issue = "0")]
#[inline]
pub fn utf8_char_width(b: u8) -> usize {
return UTF8_CHAR_WIDTH[b as usize] as usize;
}
/// Mask of the value bits of a continuation byte
const CONT_MASK: u8 = 0b0011_1111;
/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte

View File

@ -256,7 +256,7 @@
#![stable(feature = "rust1", since = "1.0.0")]
use cmp;
use std_unicode::str as core_str;
use core::str as core_str;
use error as std_error;
use fmt;
use result;

View File

@ -47,7 +47,7 @@ pub mod char;
#[allow(deprecated)]
pub mod str {
pub use u_str::{SplitWhitespace, UnicodeStr};
pub use u_str::{is_utf16, utf8_char_width};
pub use u_str::is_utf16;
pub use u_str::Utf16Encoder;
}

View File

@ -77,32 +77,6 @@ impl UnicodeStr for str {
}
}
// https://tools.ietf.org/html/rfc3629
static UTF8_CHAR_WIDTH: [u8; 256] = [
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
];
/// Given a first byte, determine how many bytes are in this UTF-8 character
#[inline]
pub fn utf8_char_width(b: u8) -> usize {
return UTF8_CHAR_WIDTH[b as usize] as usize;
}
/// Determines if a vector of `u16` contains valid UTF-16
pub fn is_utf16(v: &[u16]) -> bool {
let mut it = v.iter();