Auto merge of #46798 - Diggsey:debug-osstr, r=dtolnay

Add lossless debug implementation for unix OsStrs

Fixes #22766

Invalid utf8 byte sequences are replaced with `\xFF` style escape codes, while valid utf8 goes through the normal `Debug` implementation.

This is necessarily different from the windows Debug implementation, which uses `\u{xxxx}` style escape sequences for unpaired surrogates, but both implementations are consistent in that they are both lossless, and display invalid sequences in the way most similar to existing language syntax.

r? @dtolnay
This commit is contained in:
bors 2017-12-18 02:54:11 +00:00
commit a3a7203e2c
5 changed files with 63 additions and 3 deletions

View File

@ -18,6 +18,7 @@ use mem;
use rc::Rc;
use sync::Arc;
use sys_common::{AsInner, IntoInner};
use sys_common::bytestring::debug_fmt_bytestring;
use std_unicode::lossy::Utf8Lossy;
#[derive(Clone, Hash)]
@ -31,7 +32,7 @@ pub struct Slice {
impl fmt::Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
debug_fmt_bytestring(&self.inner, formatter)
}
}

View File

@ -18,6 +18,7 @@ use mem;
use rc::Rc;
use sync::Arc;
use sys_common::{AsInner, IntoInner};
use sys_common::bytestring::debug_fmt_bytestring;
use std_unicode::lossy::Utf8Lossy;
#[derive(Clone, Hash)]
@ -31,7 +32,7 @@ pub struct Slice {
impl fmt::Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
debug_fmt_bytestring(&self.inner, formatter)
}
}

View File

@ -18,6 +18,7 @@ use mem;
use rc::Rc;
use sync::Arc;
use sys_common::{AsInner, IntoInner};
use sys_common::bytestring::debug_fmt_bytestring;
use std_unicode::lossy::Utf8Lossy;
#[derive(Clone, Hash)]
@ -31,7 +32,7 @@ pub struct Slice {
impl fmt::Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
debug_fmt_bytestring(&self.inner, formatter)
}
}

View File

@ -0,0 +1,56 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![allow(dead_code)]
use fmt::{Formatter, Result, Write};
use std_unicode::lossy::{Utf8Lossy, Utf8LossyChunk};
pub fn debug_fmt_bytestring(slice: &[u8], f: &mut Formatter) -> Result {
// Writes out a valid unicode string with the correct escape sequences
fn write_str_escaped(f: &mut Formatter, s: &str) -> Result {
for c in s.chars().flat_map(|c| c.escape_debug()) {
f.write_char(c)?
}
Ok(())
}
f.write_str("\"")?;
for Utf8LossyChunk { valid, broken } in Utf8Lossy::from_bytes(slice).chunks() {
write_str_escaped(f, valid)?;
for b in broken {
write!(f, "\\x{:02X}", b)?;
}
}
f.write_str("\"")
}
#[cfg(test)]
mod tests {
use super::*;
use fmt::{Formatter, Result, Debug};
#[test]
fn smoke() {
struct Helper<'a>(&'a [u8]);
impl<'a> Debug for Helper<'a> {
fn fmt(&self, f: &mut Formatter) -> Result {
debug_fmt_bytestring(self.0, f)
}
}
let input = b"\xF0hello,\tworld";
let expected = r#""\xF0hello,\tworld""#;
let output = format!("{:?}", Helper(input));
assert!(output == expected);
}
}

View File

@ -43,6 +43,7 @@ pub mod thread_info;
pub mod thread_local;
pub mod util;
pub mod wtf8;
pub mod bytestring;
cfg_if! {
if #[cfg(any(target_os = "redox", target_os = "l4re"))] {