auto merge of #14613 : schmee/rust/utf16-iterator, r=huonw
Closes #14358. ~~The tests are not yet moved to `utf16_iter`, so this probably won't compile. I'm submitting this PR anyway so it can be reviewed and since it was mentioned in #14611.~~ EDIT: Tests now use `utf16_iter`. This deprecates `.to_utf16`. `x.to_utf16()` should be replaced by either `x.utf16_iter().collect::<Vec<u16>>()` (the type annotation may be optional), or just `x.utf16_iter()` directly, if it can be used in an iterator context. [breaking-change] cc @huonw
This commit is contained in:
commit
a345c54334
@ -803,15 +803,9 @@ pub trait StrAllocating: Str {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Converts to a vector of `u16` encoded as UTF-16.
|
/// Converts to a vector of `u16` encoded as UTF-16.
|
||||||
|
#[deprecated = "use `utf16_units` instead"]
|
||||||
fn to_utf16(&self) -> Vec<u16> {
|
fn to_utf16(&self) -> Vec<u16> {
|
||||||
let me = self.as_slice();
|
self.as_slice().utf16_units().collect::<Vec<u16>>()
|
||||||
let mut u = Vec::new();
|
|
||||||
for ch in me.chars() {
|
|
||||||
let mut buf = [0u16, ..2];
|
|
||||||
let n = ch.encode_utf16(buf /* as mut slice! */);
|
|
||||||
u.push_all(buf.slice_to(n));
|
|
||||||
}
|
|
||||||
u
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Given a string, make a new string with repeated copies of it.
|
/// Given a string, make a new string with repeated copies of it.
|
||||||
@ -1619,14 +1613,17 @@ mod tests {
|
|||||||
|
|
||||||
for p in pairs.iter() {
|
for p in pairs.iter() {
|
||||||
let (s, u) = (*p).clone();
|
let (s, u) = (*p).clone();
|
||||||
assert!(is_utf16(u.as_slice()));
|
let s_as_utf16 = s.as_slice().utf16_units().collect::<Vec<u16>>();
|
||||||
assert_eq!(s.to_utf16(), u);
|
let u_as_string = from_utf16(u.as_slice()).unwrap();
|
||||||
|
|
||||||
assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
|
assert!(is_utf16(u.as_slice()));
|
||||||
|
assert_eq!(s_as_utf16, u);
|
||||||
|
|
||||||
|
assert_eq!(u_as_string, s);
|
||||||
assert_eq!(from_utf16_lossy(u.as_slice()), s);
|
assert_eq!(from_utf16_lossy(u.as_slice()), s);
|
||||||
|
|
||||||
assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
|
assert_eq!(from_utf16(s_as_utf16.as_slice()).unwrap(), s);
|
||||||
assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
|
assert_eq!(u_as_string.as_slice().utf16_units().collect::<Vec<u16>>(), u);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
use mem;
|
use mem;
|
||||||
use char;
|
use char;
|
||||||
|
use char::Char;
|
||||||
use clone::Clone;
|
use clone::Clone;
|
||||||
use cmp;
|
use cmp;
|
||||||
use cmp::{PartialEq, Eq};
|
use cmp::{PartialEq, Eq};
|
||||||
@ -24,7 +25,7 @@ use default::Default;
|
|||||||
use iter::{Filter, Map, Iterator};
|
use iter::{Filter, Map, Iterator};
|
||||||
use iter::{DoubleEndedIterator, ExactSize};
|
use iter::{DoubleEndedIterator, ExactSize};
|
||||||
use iter::range;
|
use iter::range;
|
||||||
use num::Saturating;
|
use num::{CheckedMul, Saturating};
|
||||||
use option::{None, Option, Some};
|
use option::{None, Option, Some};
|
||||||
use raw::Repr;
|
use raw::Repr;
|
||||||
use slice::ImmutableVector;
|
use slice::ImmutableVector;
|
||||||
@ -557,6 +558,41 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// External iterator for a string's UTF16 codeunits.
|
||||||
|
/// Use with the `std::iter` module.
|
||||||
|
#[deriving(Clone)]
|
||||||
|
pub struct Utf16CodeUnits<'a> {
|
||||||
|
chars: Chars<'a>,
|
||||||
|
extra: u16
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
|
||||||
|
#[inline]
|
||||||
|
fn next(&mut self) -> Option<u16> {
|
||||||
|
if self.extra != 0 {
|
||||||
|
let tmp = self.extra;
|
||||||
|
self.extra = 0;
|
||||||
|
return Some(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut buf = [0u16, ..2];
|
||||||
|
self.chars.next().map(|ch| {
|
||||||
|
let n = ch.encode_utf16(buf /* as mut slice! */);
|
||||||
|
if n == 2 { self.extra = buf[1]; }
|
||||||
|
buf[0]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn size_hint(&self) -> (uint, Option<uint>) {
|
||||||
|
let (low, high) = self.chars.size_hint();
|
||||||
|
// every char gets either one u16 or two u16,
|
||||||
|
// so this iterator is between 1 or 2 times as
|
||||||
|
// long as the underlying iterator.
|
||||||
|
(low, high.and_then(|n| n.checked_mul(&2)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Section: Comparing strings
|
Section: Comparing strings
|
||||||
*/
|
*/
|
||||||
@ -1609,6 +1645,9 @@ pub trait StrSlice<'a> {
|
|||||||
/// and that it is not reallocated (e.g. by pushing to the
|
/// and that it is not reallocated (e.g. by pushing to the
|
||||||
/// string).
|
/// string).
|
||||||
fn as_ptr(&self) -> *const u8;
|
fn as_ptr(&self) -> *const u8;
|
||||||
|
|
||||||
|
/// Return an iterator of `u16` over the string encoded as UTF-16.
|
||||||
|
fn utf16_units(&self) -> Utf16CodeUnits<'a>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> StrSlice<'a> for &'a str {
|
impl<'a> StrSlice<'a> for &'a str {
|
||||||
@ -1957,6 +1996,11 @@ impl<'a> StrSlice<'a> for &'a str {
|
|||||||
fn as_ptr(&self) -> *const u8 {
|
fn as_ptr(&self) -> *const u8 {
|
||||||
self.repr().data
|
self.repr().data
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn utf16_units(&self) -> Utf16CodeUnits<'a> {
|
||||||
|
Utf16CodeUnits{ chars: self.chars(), extra: 0}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Default for &'a str {
|
impl<'a> Default for &'a str {
|
||||||
|
@ -70,6 +70,7 @@ extern "system" {
|
|||||||
|
|
||||||
pub mod compat {
|
pub mod compat {
|
||||||
use std::intrinsics::{atomic_store_relaxed, transmute};
|
use std::intrinsics::{atomic_store_relaxed, transmute};
|
||||||
|
use std::iter::Iterator;
|
||||||
use libc::types::os::arch::extra::{LPCWSTR, HMODULE, LPCSTR, LPVOID};
|
use libc::types::os::arch::extra::{LPCWSTR, HMODULE, LPCSTR, LPVOID};
|
||||||
|
|
||||||
extern "system" {
|
extern "system" {
|
||||||
@ -82,7 +83,8 @@ pub mod compat {
|
|||||||
// layer (after it's loaded) shouldn't be any slower than a regular DLL
|
// layer (after it's loaded) shouldn't be any slower than a regular DLL
|
||||||
// call.
|
// call.
|
||||||
unsafe fn store_func(ptr: *mut uint, module: &str, symbol: &str, fallback: uint) {
|
unsafe fn store_func(ptr: *mut uint, module: &str, symbol: &str, fallback: uint) {
|
||||||
let module = module.to_utf16().append_one(0);
|
let module: Vec<u16> = module.utf16_units().collect();
|
||||||
|
let module = module.append_one(0);
|
||||||
symbol.with_c_str(|symbol| {
|
symbol.with_c_str(|symbol| {
|
||||||
let handle = GetModuleHandleW(module.as_ptr());
|
let handle = GetModuleHandleW(module.as_ptr());
|
||||||
let func: uint = transmute(GetProcAddress(handle, symbol));
|
let func: uint = transmute(GetProcAddress(handle, symbol));
|
||||||
|
@ -255,7 +255,7 @@ impl Drop for Inner {
|
|||||||
|
|
||||||
pub fn to_utf16(s: &CString) -> IoResult<Vec<u16>> {
|
pub fn to_utf16(s: &CString) -> IoResult<Vec<u16>> {
|
||||||
match s.as_str() {
|
match s.as_str() {
|
||||||
Some(s) => Ok(s.to_utf16().append_one(0)),
|
Some(s) => Ok(s.utf16_units().collect::<Vec<u16>>().append_one(0)),
|
||||||
None => Err(IoError {
|
None => Err(IoError {
|
||||||
code: libc::ERROR_INVALID_NAME as uint,
|
code: libc::ERROR_INVALID_NAME as uint,
|
||||||
extra: 0,
|
extra: 0,
|
||||||
|
@ -294,6 +294,8 @@ fn spawn_process_os(cfg: ProcessConfig,
|
|||||||
use libc::funcs::extra::msvcrt::get_osfhandle;
|
use libc::funcs::extra::msvcrt::get_osfhandle;
|
||||||
|
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
use std::iter::Iterator;
|
||||||
|
use std::str::StrSlice;
|
||||||
|
|
||||||
if cfg.gid.is_some() || cfg.uid.is_some() {
|
if cfg.gid.is_some() || cfg.uid.is_some() {
|
||||||
return Err(IoError {
|
return Err(IoError {
|
||||||
@ -328,7 +330,8 @@ fn spawn_process_os(cfg: ProcessConfig,
|
|||||||
lpSecurityDescriptor: ptr::mut_null(),
|
lpSecurityDescriptor: ptr::mut_null(),
|
||||||
bInheritHandle: 1,
|
bInheritHandle: 1,
|
||||||
};
|
};
|
||||||
let filename = "NUL".to_utf16().append_one(0);
|
let filename: Vec<u16> = "NUL".utf16_units().collect();
|
||||||
|
let filename = filename.append_one(0);
|
||||||
*slot = libc::CreateFileW(filename.as_ptr(),
|
*slot = libc::CreateFileW(filename.as_ptr(),
|
||||||
access,
|
access,
|
||||||
libc::FILE_SHARE_READ |
|
libc::FILE_SHARE_READ |
|
||||||
@ -371,7 +374,8 @@ fn spawn_process_os(cfg: ProcessConfig,
|
|||||||
|
|
||||||
with_envp(cfg.env, |envp| {
|
with_envp(cfg.env, |envp| {
|
||||||
with_dirp(cfg.cwd, |dirp| {
|
with_dirp(cfg.cwd, |dirp| {
|
||||||
let mut cmd_str = cmd_str.to_utf16().append_one(0);
|
let mut cmd_str: Vec<u16> = cmd_str.as_slice().utf16_units().collect();
|
||||||
|
cmd_str = cmd_str.append_one(0);
|
||||||
let created = CreateProcessW(ptr::null(),
|
let created = CreateProcessW(ptr::null(),
|
||||||
cmd_str.as_mut_ptr(),
|
cmd_str.as_mut_ptr(),
|
||||||
ptr::mut_null(),
|
ptr::mut_null(),
|
||||||
@ -770,7 +774,7 @@ fn with_envp<T>(env: Option<&[(CString, CString)]>, cb: |*mut c_void| -> T) -> T
|
|||||||
let kv = format!("{}={}",
|
let kv = format!("{}={}",
|
||||||
pair.ref0().as_str().unwrap(),
|
pair.ref0().as_str().unwrap(),
|
||||||
pair.ref1().as_str().unwrap());
|
pair.ref1().as_str().unwrap());
|
||||||
blk.push_all(kv.to_utf16().as_slice());
|
blk.extend(kv.as_slice().utf16_units());
|
||||||
blk.push(0);
|
blk.push(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -788,7 +792,9 @@ fn with_dirp<T>(d: Option<&CString>, cb: |*const u16| -> T) -> T {
|
|||||||
Some(dir) => {
|
Some(dir) => {
|
||||||
let dir_str = dir.as_str()
|
let dir_str = dir.as_str()
|
||||||
.expect("expected workingdirectory to be utf-8 encoded");
|
.expect("expected workingdirectory to be utf-8 encoded");
|
||||||
let dir_str = dir_str.to_utf16().append_one(0);
|
let dir_str: Vec<u16> = dir_str.utf16_units().collect();
|
||||||
|
let dir_str = dir_str.append_one(0);
|
||||||
|
|
||||||
cb(dir_str.as_ptr())
|
cb(dir_str.as_ptr())
|
||||||
},
|
},
|
||||||
None => cb(ptr::null())
|
None => cb(ptr::null())
|
||||||
|
@ -162,7 +162,8 @@ mod imp {
|
|||||||
|
|
||||||
impl Lock {
|
impl Lock {
|
||||||
pub fn new(p: &Path) -> Lock {
|
pub fn new(p: &Path) -> Lock {
|
||||||
let p_16 = p.as_str().unwrap().to_utf16().append_one(0);
|
let p_16: Vec<u16> = p.as_str().unwrap().utf16_units().collect();
|
||||||
|
let p_16 = p_16.append_one(0);
|
||||||
let handle = unsafe {
|
let handle = unsafe {
|
||||||
libc::CreateFileW(p_16.as_ptr(),
|
libc::CreateFileW(p_16.as_ptr(),
|
||||||
libc::FILE_GENERIC_READ |
|
libc::FILE_GENERIC_READ |
|
||||||
|
@ -281,19 +281,22 @@ pub mod dl {
|
|||||||
#[cfg(target_os = "win32")]
|
#[cfg(target_os = "win32")]
|
||||||
pub mod dl {
|
pub mod dl {
|
||||||
use c_str::ToCStr;
|
use c_str::ToCStr;
|
||||||
|
use iter::Iterator;
|
||||||
use libc;
|
use libc;
|
||||||
use os;
|
use os;
|
||||||
use ptr;
|
use ptr;
|
||||||
use result::{Ok, Err, Result};
|
use result::{Ok, Err, Result};
|
||||||
use str::StrAllocating;
|
use str::StrSlice;
|
||||||
use str;
|
use str;
|
||||||
use string::String;
|
use string::String;
|
||||||
|
use vec::Vec;
|
||||||
|
|
||||||
pub unsafe fn open_external<T: ToCStr>(filename: T) -> *mut u8 {
|
pub unsafe fn open_external<T: ToCStr>(filename: T) -> *mut u8 {
|
||||||
// Windows expects Unicode data
|
// Windows expects Unicode data
|
||||||
let filename_cstr = filename.to_c_str();
|
let filename_cstr = filename.to_c_str();
|
||||||
let filename_str = str::from_utf8(filename_cstr.as_bytes_no_nul()).unwrap();
|
let filename_str = str::from_utf8(filename_cstr.as_bytes_no_nul()).unwrap();
|
||||||
let filename_str = filename_str.to_utf16().append_one(0);
|
let filename_str: Vec<u16> = filename_str.utf16_units().collect();
|
||||||
|
let filename_str = filename_str.append_one(0);
|
||||||
LoadLibraryW(filename_str.as_ptr() as *const libc::c_void) as *mut u8
|
LoadLibraryW(filename_str.as_ptr() as *const libc::c_void) as *mut u8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -365,7 +365,8 @@ pub fn getenv(n: &str) -> Option<String> {
|
|||||||
unsafe {
|
unsafe {
|
||||||
with_env_lock(|| {
|
with_env_lock(|| {
|
||||||
use os::win32::{fill_utf16_buf_and_decode};
|
use os::win32::{fill_utf16_buf_and_decode};
|
||||||
let n = n.to_utf16().append_one(0);
|
let n: Vec<u16> = n.utf16_units().collect();
|
||||||
|
let n = n.append_one(0);
|
||||||
fill_utf16_buf_and_decode(|buf, sz| {
|
fill_utf16_buf_and_decode(|buf, sz| {
|
||||||
libc::GetEnvironmentVariableW(n.as_ptr(), buf, sz)
|
libc::GetEnvironmentVariableW(n.as_ptr(), buf, sz)
|
||||||
})
|
})
|
||||||
@ -411,8 +412,10 @@ pub fn setenv(n: &str, v: &str) {
|
|||||||
|
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
fn _setenv(n: &str, v: &str) {
|
fn _setenv(n: &str, v: &str) {
|
||||||
let n = n.to_utf16().append_one(0);
|
let n: Vec<u16> = n.utf16_units().collect();
|
||||||
let v = v.to_utf16().append_one(0);
|
let n = n.append_one(0);
|
||||||
|
let v: Vec<u16> = v.utf16_units().collect();
|
||||||
|
let v = v.append_one(0);
|
||||||
unsafe {
|
unsafe {
|
||||||
with_env_lock(|| {
|
with_env_lock(|| {
|
||||||
libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr());
|
libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr());
|
||||||
@ -437,7 +440,8 @@ pub fn unsetenv(n: &str) {
|
|||||||
|
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
fn _unsetenv(n: &str) {
|
fn _unsetenv(n: &str) {
|
||||||
let n = n.to_utf16().append_one(0);
|
let n: Vec<u16> = n.utf16_units().collect();
|
||||||
|
let n = n.append_one(0);
|
||||||
unsafe {
|
unsafe {
|
||||||
with_env_lock(|| {
|
with_env_lock(|| {
|
||||||
libc::SetEnvironmentVariableW(n.as_ptr(), ptr::null());
|
libc::SetEnvironmentVariableW(n.as_ptr(), ptr::null());
|
||||||
@ -804,7 +808,7 @@ pub fn change_dir(p: &Path) -> bool {
|
|||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
fn chdir(p: &Path) -> bool {
|
fn chdir(p: &Path) -> bool {
|
||||||
let p = match p.as_str() {
|
let p = match p.as_str() {
|
||||||
Some(s) => s.to_utf16().append_one(0),
|
Some(s) => s.utf16_units().collect::<Vec<u16>>().append_one(0),
|
||||||
None => return false,
|
None => return false,
|
||||||
};
|
};
|
||||||
unsafe {
|
unsafe {
|
||||||
|
Loading…
Reference in New Issue
Block a user