Only keep one copy of the UTF8_CHAR_WIDTH table.
… instead of one of each of libcore and libstd_unicode. Move the `utf8_char_width` function to `core::str` under the `str_internals` unstable feature.
This commit is contained in:
parent
691eba1358
commit
031f9b15df
|
@ -54,6 +54,7 @@
|
|||
#![feature(slice_patterns)]
|
||||
#![feature(specialization)]
|
||||
#![feature(staged_api)]
|
||||
#![feature(str_internals)]
|
||||
#![feature(trusted_len)]
|
||||
#![feature(unicode)]
|
||||
#![feature(unique)]
|
||||
|
|
|
@ -62,9 +62,9 @@ use core::iter::{FromIterator, FusedIterator};
|
|||
use core::mem;
|
||||
use core::ops::{self, Add, AddAssign, Index, IndexMut};
|
||||
use core::ptr;
|
||||
use core::str as core_str;
|
||||
use core::str::pattern::Pattern;
|
||||
use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
|
||||
use std_unicode::str as unicode_str;
|
||||
|
||||
use borrow::{Cow, ToOwned};
|
||||
use range::RangeArgument;
|
||||
|
@ -575,7 +575,7 @@ impl String {
|
|||
if byte < 128 {
|
||||
// subseqidx handles this
|
||||
} else {
|
||||
let w = unicode_str::utf8_char_width(byte);
|
||||
let w = core_str::utf8_char_width(byte);
|
||||
|
||||
match w {
|
||||
2 => {
|
||||
|
|
|
@ -1352,6 +1352,13 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [
|
|||
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
|
||||
];
|
||||
|
||||
/// Given a first byte, determine how many bytes are in this UTF-8 character
|
||||
#[unstable(feature = "str_internals", issue = "0")]
|
||||
#[inline]
|
||||
pub fn utf8_char_width(b: u8) -> usize {
|
||||
return UTF8_CHAR_WIDTH[b as usize] as usize;
|
||||
}
|
||||
|
||||
/// Mask of the value bits of a continuation byte
|
||||
const CONT_MASK: u8 = 0b0011_1111;
|
||||
/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
|
||||
|
|
|
@ -256,7 +256,7 @@
|
|||
#![stable(feature = "rust1", since = "1.0.0")]
|
||||
|
||||
use cmp;
|
||||
use std_unicode::str as core_str;
|
||||
use core::str as core_str;
|
||||
use error as std_error;
|
||||
use fmt;
|
||||
use result;
|
||||
|
|
|
@ -47,7 +47,7 @@ pub mod char;
|
|||
#[allow(deprecated)]
|
||||
pub mod str {
|
||||
pub use u_str::{SplitWhitespace, UnicodeStr};
|
||||
pub use u_str::{is_utf16, utf8_char_width};
|
||||
pub use u_str::is_utf16;
|
||||
pub use u_str::Utf16Encoder;
|
||||
}
|
||||
|
||||
|
|
|
@ -77,32 +77,6 @@ impl UnicodeStr for str {
|
|||
}
|
||||
}
|
||||
|
||||
// https://tools.ietf.org/html/rfc3629
|
||||
static UTF8_CHAR_WIDTH: [u8; 256] = [
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
|
||||
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
|
||||
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
|
||||
];
|
||||
|
||||
/// Given a first byte, determine how many bytes are in this UTF-8 character
|
||||
#[inline]
|
||||
pub fn utf8_char_width(b: u8) -> usize {
|
||||
return UTF8_CHAR_WIDTH[b as usize] as usize;
|
||||
}
|
||||
|
||||
/// Determines if a vector of `u16` contains valid UTF-16
|
||||
pub fn is_utf16(v: &[u16]) -> bool {
|
||||
let mut it = v.iter();
|
||||
|
|
Loading…
Reference in New Issue