diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 60ee8cc04f7..e968899d21d 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -1680,7 +1680,7 @@ mod tests { fn test_chars_decoding() { let mut bytes = [0u8, ..4]; for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) { - let len = c.encode_utf8(bytes); + let len = c.encode_utf8(bytes).unwrap_or(0); let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap(); if Some(c) != s.chars().next() { fail!("character {:x}={} does not decode correctly", c as u32, c); @@ -1692,7 +1692,7 @@ mod tests { fn test_chars_rev_decoding() { let mut bytes = [0u8, ..4]; for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) { - let len = c.encode_utf8(bytes); + let len = c.encode_utf8(bytes).unwrap_or(0); let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap(); if Some(c) != s.chars().rev().next() { fail!("character {:x}={} does not decode correctly", c as u32, c); diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 3b9e2ac72dc..c31f4df31c4 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -503,7 +503,7 @@ impl String { data: self.vec.as_ptr().offset(cur_len as int), len: 4, }; - let used = ch.encode_utf8(mem::transmute(slice)); + let used = ch.encode_utf8(mem::transmute(slice)).unwrap_or(0); self.vec.set_len(cur_len + used); } } diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 63ffc4a046f..4e9a72c6af5 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -18,6 +18,7 @@ use mem::transmute; use option::{None, Option, Some}; use iter::range_step; +use collections::Collection; // UTF-8 ranges and tags for encoding characters static TAG_CONT: u8 = 0b1000_0000u8; @@ -27,7 +28,6 @@ static TAG_FOUR_B: u8 = 0b1111_0000u8; static MAX_ONE_B: u32 = 0x80u32; static MAX_TWO_B: u32 = 0x800u32; static MAX_THREE_B: u32 = 0x10000u32; -static MAX_FOUR_B: u32 = 0x200000u32; /* Lu Uppercase_Letter an uppercase letter @@ -217,14 +217,14 @@ pub fn escape_default(c: char, f: |char|) { } /// Returns the amount of bytes this `char` would need if encoded in UTF-8 +#[inline] pub fn len_utf8_bytes(c: char) -> uint { let code = c as u32; match () { _ if code < MAX_ONE_B => 1u, _ if code < MAX_TWO_B => 2u, _ if code < MAX_THREE_B => 3u, - _ if code < MAX_FOUR_B => 4u, - _ => fail!("invalid character!"), + _ => 4u, } } @@ -297,21 +297,19 @@ pub trait Char { /// UTF-8. fn len_utf8_bytes(&self) -> uint; - /// Encodes this character as UTF-8 into the provided byte buffer. + /// Encodes this character as UTF-8 into the provided byte buffer, + /// and then returns the number of bytes written. /// - /// The buffer must be at least 4 bytes long or a runtime failure may - /// occur. - /// - /// This will then return the number of bytes written to the slice. - fn encode_utf8(&self, dst: &mut [u8]) -> uint; + /// If the buffer is not large enough, nothing will be written into it + /// and a `None` will be returned. + fn encode_utf8(&self, dst: &mut [u8]) -> Option; - /// Encodes this character as UTF-16 into the provided `u16` buffer. + /// Encodes this character as UTF-16 into the provided `u16` buffer, + /// and then returns the number of `u16`s written. /// - /// The buffer must be at least 2 elements long or a runtime failure may - /// occur. - /// - /// This will then return the number of `u16`s written to the slice. - fn encode_utf16(&self, dst: &mut [u16]) -> uint; + /// If the buffer is not large enough, nothing will be written into it + /// and a `None` will be returned. + fn encode_utf16(&self, dst: &mut [u16]) -> Option; } impl Char for char { @@ -325,45 +323,52 @@ impl Char for char { fn escape_default(&self, f: |char|) { escape_default(*self, f) } + #[inline] fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) } - fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint { + #[inline] + fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> Option { + // Marked #[inline] to allow llvm optimizing it away let code = *self as u32; - if code < MAX_ONE_B { + if code < MAX_ONE_B && dst.len() >= 1 { dst[0] = code as u8; - 1 - } else if code < MAX_TWO_B { + Some(1) + } else if code < MAX_TWO_B && dst.len() >= 2 { dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B; dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT; - 2 - } else if code < MAX_THREE_B { + Some(2) + } else if code < MAX_THREE_B && dst.len() >= 3 { dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B; dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT; dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT; - 3 - } else { + Some(3) + } else if dst.len() >= 4 { dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B; dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT; dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT; dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT; - 4 + Some(4) + } else { + None } } - fn encode_utf16(&self, dst: &mut [u16]) -> uint { + #[inline] + fn encode_utf16(&self, dst: &mut [u16]) -> Option { + // Marked #[inline] to allow llvm optimizing it away let mut ch = *self as u32; - if (ch & 0xFFFF_u32) == ch { + if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 { // The BMP falls through (assuming non-surrogate, as it should) - assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32); dst[0] = ch as u16; - 1 - } else { + Some(1) + } else if dst.len() >= 2 { // Supplementary planes break into surrogates. - assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32); ch -= 0x1_0000_u32; dst[0] = 0xD800_u16 | ((ch >> 10) as u16); dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16); - 2 + Some(2) + } else { + None } } } diff --git a/src/libcore/fmt/mod.rs b/src/libcore/fmt/mod.rs index 942f7f8b710..cc152112917 100644 --- a/src/libcore/fmt/mod.rs +++ b/src/libcore/fmt/mod.rs @@ -364,7 +364,7 @@ impl<'a> Formatter<'a> { let write_prefix = |f: &mut Formatter| { for c in sign.move_iter() { let mut b = [0, ..4]; - let n = c.encode_utf8(b); + let n = c.encode_utf8(b).unwrap_or(0); try!(f.buf.write(b.slice_to(n))); } if prefixed { f.buf.write(prefix.as_bytes()) } @@ -464,7 +464,7 @@ impl<'a> Formatter<'a> { try!(f(self)); } let mut fill = [0u8, ..4]; - let len = self.fill.encode_utf8(fill); + let len = self.fill.encode_utf8(fill).unwrap_or(0); for _ in range(0, padding) { try!(self.buf.write(fill.slice_to(len))); } @@ -540,7 +540,7 @@ impl<'a, T: str::Str> String for T { impl Char for char { fn fmt(&self, f: &mut Formatter) -> Result { let mut utf8 = [0u8, ..4]; - let amt = self.encode_utf8(utf8); + let amt = self.encode_utf8(utf8).unwrap_or(0); let s: &str = unsafe { mem::transmute(utf8.slice_to(amt)) }; secret_string(&s, f) } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 4f7db7b41f3..095605326c7 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -30,7 +30,7 @@ use iter::range; use num::{CheckedMul, Saturating}; use option::{Option, None, Some}; use raw::Repr; -use slice::ImmutableSlice; +use slice::{ImmutableSlice, MutableSlice}; use slice; use uint; @@ -646,7 +646,7 @@ impl<'a> Iterator for Utf16CodeUnits<'a> { let mut buf = [0u16, ..2]; self.chars.next().map(|ch| { - let n = ch.encode_utf16(buf /* as mut slice! */); + let n = ch.encode_utf16(buf.as_mut_slice()).unwrap_or(0); if n == 2 { self.extra = buf[1]; } buf[0] }) diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index ebc6e986228..8c3f4706a3c 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -173,7 +173,7 @@ fn test_escape_unicode() { fn test_encode_utf8() { fn check(input: char, expect: &[u8]) { let mut buf = [0u8, ..4]; - let n = input.encode_utf8(buf /* as mut slice! */); + let n = input.encode_utf8(buf.as_mut_slice()).unwrap_or(0); assert_eq!(buf.slice_to(n), expect); } @@ -187,7 +187,7 @@ fn test_encode_utf8() { fn test_encode_utf16() { fn check(input: char, expect: &[u16]) { let mut buf = [0u16, ..2]; - let n = input.encode_utf16(buf /* as mut slice! */); + let n = input.encode_utf16(buf.as_mut_slice()).unwrap_or(0); assert_eq!(buf.slice_to(n), expect); } diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index c95dd8618ed..e93a958acc6 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -1110,7 +1110,7 @@ pub trait Writer { #[inline] fn write_char(&mut self, c: char) -> IoResult<()> { let mut buf = [0u8, ..4]; - let n = c.encode_utf8(buf.as_mut_slice()); + let n = c.encode_utf8(buf.as_mut_slice()).unwrap_or(0); self.write(buf.slice_to(n)) }