auto merge of #14613 : schmee/rust/utf16-iterator, r=huonw
Closes #14358. ~~The tests are not yet moved to `utf16_iter`, so this probably won't compile. I'm submitting this PR anyway so it can be reviewed and since it was mentioned in #14611.~~ EDIT: Tests now use `utf16_iter`. This deprecates `.to_utf16`. `x.to_utf16()` should be replaced by either `x.utf16_iter().collect::<Vec<u16>>()` (the type annotation may be optional), or just `x.utf16_iter()` directly, if it can be used in an iterator context. [breaking-change] cc @huonw
This commit is contained in:
commit
a345c54334
@ -803,15 +803,9 @@ pub trait StrAllocating: Str {
|
||||
}
|
||||
|
||||
/// Converts to a vector of `u16` encoded as UTF-16.
|
||||
#[deprecated = "use `utf16_units` instead"]
|
||||
fn to_utf16(&self) -> Vec<u16> {
|
||||
let me = self.as_slice();
|
||||
let mut u = Vec::new();
|
||||
for ch in me.chars() {
|
||||
let mut buf = [0u16, ..2];
|
||||
let n = ch.encode_utf16(buf /* as mut slice! */);
|
||||
u.push_all(buf.slice_to(n));
|
||||
}
|
||||
u
|
||||
self.as_slice().utf16_units().collect::<Vec<u16>>()
|
||||
}
|
||||
|
||||
/// Given a string, make a new string with repeated copies of it.
|
||||
@ -1619,14 +1613,17 @@ mod tests {
|
||||
|
||||
for p in pairs.iter() {
|
||||
let (s, u) = (*p).clone();
|
||||
assert!(is_utf16(u.as_slice()));
|
||||
assert_eq!(s.to_utf16(), u);
|
||||
let s_as_utf16 = s.as_slice().utf16_units().collect::<Vec<u16>>();
|
||||
let u_as_string = from_utf16(u.as_slice()).unwrap();
|
||||
|
||||
assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
|
||||
assert!(is_utf16(u.as_slice()));
|
||||
assert_eq!(s_as_utf16, u);
|
||||
|
||||
assert_eq!(u_as_string, s);
|
||||
assert_eq!(from_utf16_lossy(u.as_slice()), s);
|
||||
|
||||
assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
|
||||
assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
|
||||
assert_eq!(from_utf16(s_as_utf16.as_slice()).unwrap(), s);
|
||||
assert_eq!(u_as_string.as_slice().utf16_units().collect::<Vec<u16>>(), u);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
|
||||
use mem;
|
||||
use char;
|
||||
use char::Char;
|
||||
use clone::Clone;
|
||||
use cmp;
|
||||
use cmp::{PartialEq, Eq};
|
||||
@ -24,7 +25,7 @@ use default::Default;
|
||||
use iter::{Filter, Map, Iterator};
|
||||
use iter::{DoubleEndedIterator, ExactSize};
|
||||
use iter::range;
|
||||
use num::Saturating;
|
||||
use num::{CheckedMul, Saturating};
|
||||
use option::{None, Option, Some};
|
||||
use raw::Repr;
|
||||
use slice::ImmutableVector;
|
||||
@ -557,6 +558,41 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// External iterator for a string's UTF16 codeunits.
|
||||
/// Use with the `std::iter` module.
|
||||
#[deriving(Clone)]
|
||||
pub struct Utf16CodeUnits<'a> {
|
||||
chars: Chars<'a>,
|
||||
extra: u16
|
||||
}
|
||||
|
||||
impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<u16> {
|
||||
if self.extra != 0 {
|
||||
let tmp = self.extra;
|
||||
self.extra = 0;
|
||||
return Some(tmp);
|
||||
}
|
||||
|
||||
let mut buf = [0u16, ..2];
|
||||
self.chars.next().map(|ch| {
|
||||
let n = ch.encode_utf16(buf /* as mut slice! */);
|
||||
if n == 2 { self.extra = buf[1]; }
|
||||
buf[0]
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (uint, Option<uint>) {
|
||||
let (low, high) = self.chars.size_hint();
|
||||
// every char gets either one u16 or two u16,
|
||||
// so this iterator is between 1 or 2 times as
|
||||
// long as the underlying iterator.
|
||||
(low, high.and_then(|n| n.checked_mul(&2)))
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Section: Comparing strings
|
||||
*/
|
||||
@ -1609,6 +1645,9 @@ pub trait StrSlice<'a> {
|
||||
/// and that it is not reallocated (e.g. by pushing to the
|
||||
/// string).
|
||||
fn as_ptr(&self) -> *const u8;
|
||||
|
||||
/// Return an iterator of `u16` over the string encoded as UTF-16.
|
||||
fn utf16_units(&self) -> Utf16CodeUnits<'a>;
|
||||
}
|
||||
|
||||
impl<'a> StrSlice<'a> for &'a str {
|
||||
@ -1957,6 +1996,11 @@ impl<'a> StrSlice<'a> for &'a str {
|
||||
fn as_ptr(&self) -> *const u8 {
|
||||
self.repr().data
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn utf16_units(&self) -> Utf16CodeUnits<'a> {
|
||||
Utf16CodeUnits{ chars: self.chars(), extra: 0}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Default for &'a str {
|
||||
|
@ -70,6 +70,7 @@ extern "system" {
|
||||
|
||||
pub mod compat {
|
||||
use std::intrinsics::{atomic_store_relaxed, transmute};
|
||||
use std::iter::Iterator;
|
||||
use libc::types::os::arch::extra::{LPCWSTR, HMODULE, LPCSTR, LPVOID};
|
||||
|
||||
extern "system" {
|
||||
@ -82,7 +83,8 @@ pub mod compat {
|
||||
// layer (after it's loaded) shouldn't be any slower than a regular DLL
|
||||
// call.
|
||||
unsafe fn store_func(ptr: *mut uint, module: &str, symbol: &str, fallback: uint) {
|
||||
let module = module.to_utf16().append_one(0);
|
||||
let module: Vec<u16> = module.utf16_units().collect();
|
||||
let module = module.append_one(0);
|
||||
symbol.with_c_str(|symbol| {
|
||||
let handle = GetModuleHandleW(module.as_ptr());
|
||||
let func: uint = transmute(GetProcAddress(handle, symbol));
|
||||
|
@ -255,7 +255,7 @@ impl Drop for Inner {
|
||||
|
||||
pub fn to_utf16(s: &CString) -> IoResult<Vec<u16>> {
|
||||
match s.as_str() {
|
||||
Some(s) => Ok(s.to_utf16().append_one(0)),
|
||||
Some(s) => Ok(s.utf16_units().collect::<Vec<u16>>().append_one(0)),
|
||||
None => Err(IoError {
|
||||
code: libc::ERROR_INVALID_NAME as uint,
|
||||
extra: 0,
|
||||
|
@ -294,6 +294,8 @@ fn spawn_process_os(cfg: ProcessConfig,
|
||||
use libc::funcs::extra::msvcrt::get_osfhandle;
|
||||
|
||||
use std::mem;
|
||||
use std::iter::Iterator;
|
||||
use std::str::StrSlice;
|
||||
|
||||
if cfg.gid.is_some() || cfg.uid.is_some() {
|
||||
return Err(IoError {
|
||||
@ -328,7 +330,8 @@ fn spawn_process_os(cfg: ProcessConfig,
|
||||
lpSecurityDescriptor: ptr::mut_null(),
|
||||
bInheritHandle: 1,
|
||||
};
|
||||
let filename = "NUL".to_utf16().append_one(0);
|
||||
let filename: Vec<u16> = "NUL".utf16_units().collect();
|
||||
let filename = filename.append_one(0);
|
||||
*slot = libc::CreateFileW(filename.as_ptr(),
|
||||
access,
|
||||
libc::FILE_SHARE_READ |
|
||||
@ -371,7 +374,8 @@ fn spawn_process_os(cfg: ProcessConfig,
|
||||
|
||||
with_envp(cfg.env, |envp| {
|
||||
with_dirp(cfg.cwd, |dirp| {
|
||||
let mut cmd_str = cmd_str.to_utf16().append_one(0);
|
||||
let mut cmd_str: Vec<u16> = cmd_str.as_slice().utf16_units().collect();
|
||||
cmd_str = cmd_str.append_one(0);
|
||||
let created = CreateProcessW(ptr::null(),
|
||||
cmd_str.as_mut_ptr(),
|
||||
ptr::mut_null(),
|
||||
@ -770,7 +774,7 @@ fn with_envp<T>(env: Option<&[(CString, CString)]>, cb: |*mut c_void| -> T) -> T
|
||||
let kv = format!("{}={}",
|
||||
pair.ref0().as_str().unwrap(),
|
||||
pair.ref1().as_str().unwrap());
|
||||
blk.push_all(kv.to_utf16().as_slice());
|
||||
blk.extend(kv.as_slice().utf16_units());
|
||||
blk.push(0);
|
||||
}
|
||||
|
||||
@ -788,7 +792,9 @@ fn with_dirp<T>(d: Option<&CString>, cb: |*const u16| -> T) -> T {
|
||||
Some(dir) => {
|
||||
let dir_str = dir.as_str()
|
||||
.expect("expected workingdirectory to be utf-8 encoded");
|
||||
let dir_str = dir_str.to_utf16().append_one(0);
|
||||
let dir_str: Vec<u16> = dir_str.utf16_units().collect();
|
||||
let dir_str = dir_str.append_one(0);
|
||||
|
||||
cb(dir_str.as_ptr())
|
||||
},
|
||||
None => cb(ptr::null())
|
||||
|
@ -162,7 +162,8 @@ mod imp {
|
||||
|
||||
impl Lock {
|
||||
pub fn new(p: &Path) -> Lock {
|
||||
let p_16 = p.as_str().unwrap().to_utf16().append_one(0);
|
||||
let p_16: Vec<u16> = p.as_str().unwrap().utf16_units().collect();
|
||||
let p_16 = p_16.append_one(0);
|
||||
let handle = unsafe {
|
||||
libc::CreateFileW(p_16.as_ptr(),
|
||||
libc::FILE_GENERIC_READ |
|
||||
|
@ -281,19 +281,22 @@ pub mod dl {
|
||||
#[cfg(target_os = "win32")]
|
||||
pub mod dl {
|
||||
use c_str::ToCStr;
|
||||
use iter::Iterator;
|
||||
use libc;
|
||||
use os;
|
||||
use ptr;
|
||||
use result::{Ok, Err, Result};
|
||||
use str::StrAllocating;
|
||||
use str::StrSlice;
|
||||
use str;
|
||||
use string::String;
|
||||
use vec::Vec;
|
||||
|
||||
pub unsafe fn open_external<T: ToCStr>(filename: T) -> *mut u8 {
|
||||
// Windows expects Unicode data
|
||||
let filename_cstr = filename.to_c_str();
|
||||
let filename_str = str::from_utf8(filename_cstr.as_bytes_no_nul()).unwrap();
|
||||
let filename_str = filename_str.to_utf16().append_one(0);
|
||||
let filename_str: Vec<u16> = filename_str.utf16_units().collect();
|
||||
let filename_str = filename_str.append_one(0);
|
||||
LoadLibraryW(filename_str.as_ptr() as *const libc::c_void) as *mut u8
|
||||
}
|
||||
|
||||
|
@ -365,7 +365,8 @@ pub fn getenv(n: &str) -> Option<String> {
|
||||
unsafe {
|
||||
with_env_lock(|| {
|
||||
use os::win32::{fill_utf16_buf_and_decode};
|
||||
let n = n.to_utf16().append_one(0);
|
||||
let n: Vec<u16> = n.utf16_units().collect();
|
||||
let n = n.append_one(0);
|
||||
fill_utf16_buf_and_decode(|buf, sz| {
|
||||
libc::GetEnvironmentVariableW(n.as_ptr(), buf, sz)
|
||||
})
|
||||
@ -411,8 +412,10 @@ pub fn setenv(n: &str, v: &str) {
|
||||
|
||||
#[cfg(windows)]
|
||||
fn _setenv(n: &str, v: &str) {
|
||||
let n = n.to_utf16().append_one(0);
|
||||
let v = v.to_utf16().append_one(0);
|
||||
let n: Vec<u16> = n.utf16_units().collect();
|
||||
let n = n.append_one(0);
|
||||
let v: Vec<u16> = v.utf16_units().collect();
|
||||
let v = v.append_one(0);
|
||||
unsafe {
|
||||
with_env_lock(|| {
|
||||
libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr());
|
||||
@ -437,7 +440,8 @@ pub fn unsetenv(n: &str) {
|
||||
|
||||
#[cfg(windows)]
|
||||
fn _unsetenv(n: &str) {
|
||||
let n = n.to_utf16().append_one(0);
|
||||
let n: Vec<u16> = n.utf16_units().collect();
|
||||
let n = n.append_one(0);
|
||||
unsafe {
|
||||
with_env_lock(|| {
|
||||
libc::SetEnvironmentVariableW(n.as_ptr(), ptr::null());
|
||||
@ -804,7 +808,7 @@ pub fn change_dir(p: &Path) -> bool {
|
||||
#[cfg(windows)]
|
||||
fn chdir(p: &Path) -> bool {
|
||||
let p = match p.as_str() {
|
||||
Some(s) => s.to_utf16().append_one(0),
|
||||
Some(s) => s.utf16_units().collect::<Vec<u16>>().append_one(0),
|
||||
None => return false,
|
||||
};
|
||||
unsafe {
|
||||
|
Loading…
Reference in New Issue
Block a user