auto merge of #14613 : schmee/rust/utf16-iterator, r=huonw

Closes #14358.

~~The tests are not yet moved to `utf16_iter`, so this probably won't compile. I'm submitting this PR anyway so it can be reviewed and since it was mentioned in #14611.~~ EDIT: Tests now use `utf16_iter`.

This deprecates `.to_utf16`. `x.to_utf16()` should be replaced by either `x.utf16_iter().collect::<Vec<u16>>()` (the type annotation may be optional), or just `x.utf16_iter()` directly, if it can be used in an iterator context.

[breaking-change]

cc @huonw
This commit is contained in:
bors 2014-06-30 19:26:35 +00:00
commit a345c54334
8 changed files with 85 additions and 28 deletions

View File

@ -803,15 +803,9 @@ pub trait StrAllocating: Str {
} }
/// Converts to a vector of `u16` encoded as UTF-16. /// Converts to a vector of `u16` encoded as UTF-16.
#[deprecated = "use `utf16_units` instead"]
fn to_utf16(&self) -> Vec<u16> { fn to_utf16(&self) -> Vec<u16> {
let me = self.as_slice(); self.as_slice().utf16_units().collect::<Vec<u16>>()
let mut u = Vec::new();
for ch in me.chars() {
let mut buf = [0u16, ..2];
let n = ch.encode_utf16(buf /* as mut slice! */);
u.push_all(buf.slice_to(n));
}
u
} }
/// Given a string, make a new string with repeated copies of it. /// Given a string, make a new string with repeated copies of it.
@ -1619,14 +1613,17 @@ mod tests {
for p in pairs.iter() { for p in pairs.iter() {
let (s, u) = (*p).clone(); let (s, u) = (*p).clone();
assert!(is_utf16(u.as_slice())); let s_as_utf16 = s.as_slice().utf16_units().collect::<Vec<u16>>();
assert_eq!(s.to_utf16(), u); let u_as_string = from_utf16(u.as_slice()).unwrap();
assert_eq!(from_utf16(u.as_slice()).unwrap(), s); assert!(is_utf16(u.as_slice()));
assert_eq!(s_as_utf16, u);
assert_eq!(u_as_string, s);
assert_eq!(from_utf16_lossy(u.as_slice()), s); assert_eq!(from_utf16_lossy(u.as_slice()), s);
assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s); assert_eq!(from_utf16(s_as_utf16.as_slice()).unwrap(), s);
assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u); assert_eq!(u_as_string.as_slice().utf16_units().collect::<Vec<u16>>(), u);
} }
} }

View File

@ -16,6 +16,7 @@
use mem; use mem;
use char; use char;
use char::Char;
use clone::Clone; use clone::Clone;
use cmp; use cmp;
use cmp::{PartialEq, Eq}; use cmp::{PartialEq, Eq};
@ -24,7 +25,7 @@ use default::Default;
use iter::{Filter, Map, Iterator}; use iter::{Filter, Map, Iterator};
use iter::{DoubleEndedIterator, ExactSize}; use iter::{DoubleEndedIterator, ExactSize};
use iter::range; use iter::range;
use num::Saturating; use num::{CheckedMul, Saturating};
use option::{None, Option, Some}; use option::{None, Option, Some};
use raw::Repr; use raw::Repr;
use slice::ImmutableVector; use slice::ImmutableVector;
@ -557,6 +558,41 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
} }
} }
/// External iterator for a string's UTF16 codeunits.
/// Use with the `std::iter` module.
#[deriving(Clone)]
pub struct Utf16CodeUnits<'a> {
chars: Chars<'a>,
extra: u16
}
impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
#[inline]
fn next(&mut self) -> Option<u16> {
if self.extra != 0 {
let tmp = self.extra;
self.extra = 0;
return Some(tmp);
}
let mut buf = [0u16, ..2];
self.chars.next().map(|ch| {
let n = ch.encode_utf16(buf /* as mut slice! */);
if n == 2 { self.extra = buf[1]; }
buf[0]
})
}
#[inline]
fn size_hint(&self) -> (uint, Option<uint>) {
let (low, high) = self.chars.size_hint();
// every char gets either one u16 or two u16,
// so this iterator is between 1 or 2 times as
// long as the underlying iterator.
(low, high.and_then(|n| n.checked_mul(&2)))
}
}
/* /*
Section: Comparing strings Section: Comparing strings
*/ */
@ -1609,6 +1645,9 @@ pub trait StrSlice<'a> {
/// and that it is not reallocated (e.g. by pushing to the /// and that it is not reallocated (e.g. by pushing to the
/// string). /// string).
fn as_ptr(&self) -> *const u8; fn as_ptr(&self) -> *const u8;
/// Return an iterator of `u16` over the string encoded as UTF-16.
fn utf16_units(&self) -> Utf16CodeUnits<'a>;
} }
impl<'a> StrSlice<'a> for &'a str { impl<'a> StrSlice<'a> for &'a str {
@ -1957,6 +1996,11 @@ impl<'a> StrSlice<'a> for &'a str {
fn as_ptr(&self) -> *const u8 { fn as_ptr(&self) -> *const u8 {
self.repr().data self.repr().data
} }
#[inline]
fn utf16_units(&self) -> Utf16CodeUnits<'a> {
Utf16CodeUnits{ chars: self.chars(), extra: 0}
}
} }
impl<'a> Default for &'a str { impl<'a> Default for &'a str {

View File

@ -70,6 +70,7 @@ extern "system" {
pub mod compat { pub mod compat {
use std::intrinsics::{atomic_store_relaxed, transmute}; use std::intrinsics::{atomic_store_relaxed, transmute};
use std::iter::Iterator;
use libc::types::os::arch::extra::{LPCWSTR, HMODULE, LPCSTR, LPVOID}; use libc::types::os::arch::extra::{LPCWSTR, HMODULE, LPCSTR, LPVOID};
extern "system" { extern "system" {
@ -82,7 +83,8 @@ pub mod compat {
// layer (after it's loaded) shouldn't be any slower than a regular DLL // layer (after it's loaded) shouldn't be any slower than a regular DLL
// call. // call.
unsafe fn store_func(ptr: *mut uint, module: &str, symbol: &str, fallback: uint) { unsafe fn store_func(ptr: *mut uint, module: &str, symbol: &str, fallback: uint) {
let module = module.to_utf16().append_one(0); let module: Vec<u16> = module.utf16_units().collect();
let module = module.append_one(0);
symbol.with_c_str(|symbol| { symbol.with_c_str(|symbol| {
let handle = GetModuleHandleW(module.as_ptr()); let handle = GetModuleHandleW(module.as_ptr());
let func: uint = transmute(GetProcAddress(handle, symbol)); let func: uint = transmute(GetProcAddress(handle, symbol));

View File

@ -255,7 +255,7 @@ impl Drop for Inner {
pub fn to_utf16(s: &CString) -> IoResult<Vec<u16>> { pub fn to_utf16(s: &CString) -> IoResult<Vec<u16>> {
match s.as_str() { match s.as_str() {
Some(s) => Ok(s.to_utf16().append_one(0)), Some(s) => Ok(s.utf16_units().collect::<Vec<u16>>().append_one(0)),
None => Err(IoError { None => Err(IoError {
code: libc::ERROR_INVALID_NAME as uint, code: libc::ERROR_INVALID_NAME as uint,
extra: 0, extra: 0,

View File

@ -294,6 +294,8 @@ fn spawn_process_os(cfg: ProcessConfig,
use libc::funcs::extra::msvcrt::get_osfhandle; use libc::funcs::extra::msvcrt::get_osfhandle;
use std::mem; use std::mem;
use std::iter::Iterator;
use std::str::StrSlice;
if cfg.gid.is_some() || cfg.uid.is_some() { if cfg.gid.is_some() || cfg.uid.is_some() {
return Err(IoError { return Err(IoError {
@ -328,7 +330,8 @@ fn spawn_process_os(cfg: ProcessConfig,
lpSecurityDescriptor: ptr::mut_null(), lpSecurityDescriptor: ptr::mut_null(),
bInheritHandle: 1, bInheritHandle: 1,
}; };
let filename = "NUL".to_utf16().append_one(0); let filename: Vec<u16> = "NUL".utf16_units().collect();
let filename = filename.append_one(0);
*slot = libc::CreateFileW(filename.as_ptr(), *slot = libc::CreateFileW(filename.as_ptr(),
access, access,
libc::FILE_SHARE_READ | libc::FILE_SHARE_READ |
@ -371,7 +374,8 @@ fn spawn_process_os(cfg: ProcessConfig,
with_envp(cfg.env, |envp| { with_envp(cfg.env, |envp| {
with_dirp(cfg.cwd, |dirp| { with_dirp(cfg.cwd, |dirp| {
let mut cmd_str = cmd_str.to_utf16().append_one(0); let mut cmd_str: Vec<u16> = cmd_str.as_slice().utf16_units().collect();
cmd_str = cmd_str.append_one(0);
let created = CreateProcessW(ptr::null(), let created = CreateProcessW(ptr::null(),
cmd_str.as_mut_ptr(), cmd_str.as_mut_ptr(),
ptr::mut_null(), ptr::mut_null(),
@ -770,7 +774,7 @@ fn with_envp<T>(env: Option<&[(CString, CString)]>, cb: |*mut c_void| -> T) -> T
let kv = format!("{}={}", let kv = format!("{}={}",
pair.ref0().as_str().unwrap(), pair.ref0().as_str().unwrap(),
pair.ref1().as_str().unwrap()); pair.ref1().as_str().unwrap());
blk.push_all(kv.to_utf16().as_slice()); blk.extend(kv.as_slice().utf16_units());
blk.push(0); blk.push(0);
} }
@ -788,7 +792,9 @@ fn with_dirp<T>(d: Option<&CString>, cb: |*const u16| -> T) -> T {
Some(dir) => { Some(dir) => {
let dir_str = dir.as_str() let dir_str = dir.as_str()
.expect("expected workingdirectory to be utf-8 encoded"); .expect("expected workingdirectory to be utf-8 encoded");
let dir_str = dir_str.to_utf16().append_one(0); let dir_str: Vec<u16> = dir_str.utf16_units().collect();
let dir_str = dir_str.append_one(0);
cb(dir_str.as_ptr()) cb(dir_str.as_ptr())
}, },
None => cb(ptr::null()) None => cb(ptr::null())

View File

@ -162,7 +162,8 @@ mod imp {
impl Lock { impl Lock {
pub fn new(p: &Path) -> Lock { pub fn new(p: &Path) -> Lock {
let p_16 = p.as_str().unwrap().to_utf16().append_one(0); let p_16: Vec<u16> = p.as_str().unwrap().utf16_units().collect();
let p_16 = p_16.append_one(0);
let handle = unsafe { let handle = unsafe {
libc::CreateFileW(p_16.as_ptr(), libc::CreateFileW(p_16.as_ptr(),
libc::FILE_GENERIC_READ | libc::FILE_GENERIC_READ |

View File

@ -281,19 +281,22 @@ pub mod dl {
#[cfg(target_os = "win32")] #[cfg(target_os = "win32")]
pub mod dl { pub mod dl {
use c_str::ToCStr; use c_str::ToCStr;
use iter::Iterator;
use libc; use libc;
use os; use os;
use ptr; use ptr;
use result::{Ok, Err, Result}; use result::{Ok, Err, Result};
use str::StrAllocating; use str::StrSlice;
use str; use str;
use string::String; use string::String;
use vec::Vec;
pub unsafe fn open_external<T: ToCStr>(filename: T) -> *mut u8 { pub unsafe fn open_external<T: ToCStr>(filename: T) -> *mut u8 {
// Windows expects Unicode data // Windows expects Unicode data
let filename_cstr = filename.to_c_str(); let filename_cstr = filename.to_c_str();
let filename_str = str::from_utf8(filename_cstr.as_bytes_no_nul()).unwrap(); let filename_str = str::from_utf8(filename_cstr.as_bytes_no_nul()).unwrap();
let filename_str = filename_str.to_utf16().append_one(0); let filename_str: Vec<u16> = filename_str.utf16_units().collect();
let filename_str = filename_str.append_one(0);
LoadLibraryW(filename_str.as_ptr() as *const libc::c_void) as *mut u8 LoadLibraryW(filename_str.as_ptr() as *const libc::c_void) as *mut u8
} }

View File

@ -365,7 +365,8 @@ pub fn getenv(n: &str) -> Option<String> {
unsafe { unsafe {
with_env_lock(|| { with_env_lock(|| {
use os::win32::{fill_utf16_buf_and_decode}; use os::win32::{fill_utf16_buf_and_decode};
let n = n.to_utf16().append_one(0); let n: Vec<u16> = n.utf16_units().collect();
let n = n.append_one(0);
fill_utf16_buf_and_decode(|buf, sz| { fill_utf16_buf_and_decode(|buf, sz| {
libc::GetEnvironmentVariableW(n.as_ptr(), buf, sz) libc::GetEnvironmentVariableW(n.as_ptr(), buf, sz)
}) })
@ -411,8 +412,10 @@ pub fn setenv(n: &str, v: &str) {
#[cfg(windows)] #[cfg(windows)]
fn _setenv(n: &str, v: &str) { fn _setenv(n: &str, v: &str) {
let n = n.to_utf16().append_one(0); let n: Vec<u16> = n.utf16_units().collect();
let v = v.to_utf16().append_one(0); let n = n.append_one(0);
let v: Vec<u16> = v.utf16_units().collect();
let v = v.append_one(0);
unsafe { unsafe {
with_env_lock(|| { with_env_lock(|| {
libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr()); libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr());
@ -437,7 +440,8 @@ pub fn unsetenv(n: &str) {
#[cfg(windows)] #[cfg(windows)]
fn _unsetenv(n: &str) { fn _unsetenv(n: &str) {
let n = n.to_utf16().append_one(0); let n: Vec<u16> = n.utf16_units().collect();
let n = n.append_one(0);
unsafe { unsafe {
with_env_lock(|| { with_env_lock(|| {
libc::SetEnvironmentVariableW(n.as_ptr(), ptr::null()); libc::SetEnvironmentVariableW(n.as_ptr(), ptr::null());
@ -804,7 +808,7 @@ pub fn change_dir(p: &Path) -> bool {
#[cfg(windows)] #[cfg(windows)]
fn chdir(p: &Path) -> bool { fn chdir(p: &Path) -> bool {
let p = match p.as_str() { let p = match p.as_str() {
Some(s) => s.to_utf16().append_one(0), Some(s) => s.utf16_units().collect::<Vec<u16>>().append_one(0),
None => return false, None => return false,
}; };
unsafe { unsafe {