Only keep one copy of the UTF8_CHAR_WIDTH table.

… instead of one of each of libcore and libstd_unicode. Move the `utf8_char_width` function to `core::str` under the `str_internals` unstable feature.
2017-03-01 22:41:44 +01:00 · 2017-03-01 22:41:44 +01:00 · 031f9b15df
parent 691eba1358
commit 031f9b15df
6 changed files with 12 additions and 30 deletions
--- a/src/libcollections/lib.rs
+++ b/src/libcollections/lib.rs
@ -54,6 +54,7 @@
 #![feature(slice_patterns)]
 #![feature(specialization)]
 #![feature(staged_api)]
+#![feature(str_internals)]
 #![feature(trusted_len)]
 #![feature(unicode)]
 #![feature(unique)]
--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@ -62,9 +62,9 @@ use core::iter::{FromIterator, FusedIterator};
 use core::mem;
 use core::ops::{self, Add, AddAssign, Index, IndexMut};
 use core::ptr;
+use core::str as core_str;
 use core::str::pattern::Pattern;
 use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
-use std_unicode::str as unicode_str;

 use borrow::{Cow, ToOwned};
 use range::RangeArgument;
@ -575,7 +575,7 @@ impl String {
            if byte < 128 {
                // subseqidx handles this
            } else {
-                let w = unicode_str::utf8_char_width(byte);
+                let w = core_str::utf8_char_width(byte);

                match w {
                    2 => {
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@ -1352,6 +1352,13 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [
 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
 ];

+/// Given a first byte, determine how many bytes are in this UTF-8 character
+#[unstable(feature = "str_internals", issue = "0")]
+#[inline]
+pub fn utf8_char_width(b: u8) -> usize {
+    return UTF8_CHAR_WIDTH[b as usize] as usize;
+}
+
 /// Mask of the value bits of a continuation byte
 const CONT_MASK: u8 = 0b0011_1111;
 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
--- a/src/libstd/io/mod.rs
+++ b/src/libstd/io/mod.rs
@ -256,7 +256,7 @@
 #![stable(feature = "rust1", since = "1.0.0")]

 use cmp;
-use std_unicode::str as core_str;
+use core::str as core_str;
 use error as std_error;
 use fmt;
 use result;
--- a/src/libstd_unicode/lib.rs
+++ b/src/libstd_unicode/lib.rs
@ -47,7 +47,7 @@ pub mod char;
 #[allow(deprecated)]
 pub mod str {
    pub use u_str::{SplitWhitespace, UnicodeStr};
-    pub use u_str::{is_utf16, utf8_char_width};
+    pub use u_str::is_utf16;
    pub use u_str::Utf16Encoder;
 }

--- a/src/libstd_unicode/u_str.rs
+++ b/src/libstd_unicode/u_str.rs
@ -77,32 +77,6 @@ impl UnicodeStr for str {
    }
 }

-// https://tools.ietf.org/html/rfc3629
-static UTF8_CHAR_WIDTH: [u8; 256] = [
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
-0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
-4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
-];
-
-/// Given a first byte, determine how many bytes are in this UTF-8 character
-#[inline]
-pub fn utf8_char_width(b: u8) -> usize {
-    return UTF8_CHAR_WIDTH[b as usize] as usize;
-}
-
 /// Determines if a vector of `u16` contains valid UTF-16
 pub fn is_utf16(v: &[u16]) -> bool {
    let mut it = v.iter();