rustdoc: Add syntax highlighting

This adds simple syntax highlighting based off libsyntax's lexer to be sure to
stay up to date with rust's grammar. Some of the highlighting is a bit ad-hoc,
but it definitely seems to get the job done!

This currently doesn't highlight rustdoc-rendered function signatures and
structs that are emitted to each page because the colors already signify what's
clickable and I think we'd have to figure out a different scheme before
colorizing them. This does, however, colorize all code examples and source code.

Closes #11393
This commit is contained in:
Alex Crichton 2014-02-20 01:14:51 -08:00
parent 87e3b5fe7f
commit ad9e26dab3
8 changed files with 239 additions and 26 deletions

View File

@ -0,0 +1,174 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Basic html highlighting functionality
//!
//! This module uses libsyntax's lexer to provide token-based highlighting for
//! the HTML documentation generated by rustdoc.
use std::str;
use std::io;
use syntax::parse;
use syntax::parse::lexer;
use syntax::diagnostic;
use syntax::codemap::{BytePos, Span};
use html::escape::Escape;
use t = syntax::parse::token;
/// Highlights some source code, returning the HTML output.
pub fn highlight(src: &str) -> ~str {
let sess = parse::new_parse_sess();
let handler = diagnostic::mk_handler();
let span_handler = diagnostic::mk_span_handler(handler, sess.cm);
let fm = parse::string_to_filemap(sess, src.to_owned(), ~"<stdin>");
let mut out = io::MemWriter::new();
doit(sess,
lexer::new_string_reader(span_handler, fm),
&mut out).unwrap();
str::from_utf8_lossy(out.unwrap()).into_owned()
}
/// Exhausts the `lexer` writing the output into `out`.
///
/// The general structure for this method is to iterate over each token,
/// possibly giving it an HTML span with a class specifying what flavor of token
/// it's used. All source code emission is done as slices from the source map,
/// not from the tokens themselves, in order to stay true to the original
/// source.
fn doit(sess: @parse::ParseSess, lexer: lexer::StringReader,
out: &mut Writer) -> io::IoResult<()> {
use syntax::parse::lexer::Reader;
try!(write!(out, "<pre class='rust'>\n"));
let mut last = BytePos(0);
let mut is_attribute = false;
let mut is_macro = false;
loop {
let next = lexer.next_token();
let test = if next.tok == t::EOF {lexer.pos.get()} else {next.sp.lo};
// The lexer consumes all whitespace and non-doc-comments when iterating
// between tokens. If this token isn't directly adjacent to our last
// token, then we need to emit the whitespace/comment.
//
// If the gap has any '/' characters then we consider the whole thing a
// comment. This will classify some whitespace as a comment, but that
// doesn't matter too much for syntax highlighting purposes.
if test > last {
let snip = sess.cm.span_to_snippet(Span {
lo: last,
hi: test,
expn_info: None,
}).unwrap();
if snip.contains("/") {
try!(write!(out, "<span class='comment'>{}</span>",
Escape(snip)));
} else {
try!(write!(out, "{}", Escape(snip)));
}
}
last = next.sp.hi;
if next.tok == t::EOF { break }
let klass = match next.tok {
// If this '&' token is directly adjacent to another token, assume
// that it's the address-of operator instead of the and-operator.
// This allows us to give all pointers their own class (~ and @ are
// below).
t::BINOP(t::AND) if lexer.peek().sp.lo == next.sp.hi => "kw-2",
t::AT | t::TILDE => "kw-2",
// consider this as part of a macro invocation if there was a
// leading identifier
t::NOT if is_macro => { is_macro = false; "macro" }
// operators
t::EQ | t::LT | t::LE | t::EQEQ | t::NE | t::GE | t::GT |
t::ANDAND | t::OROR | t::NOT | t::BINOP(..) | t::RARROW |
t::BINOPEQ(..) | t::FAT_ARROW => "op",
// miscellaneous, no highlighting
t::DOT | t::DOTDOT | t::DOTDOTDOT | t::COMMA | t::SEMI |
t::COLON | t::MOD_SEP | t::LARROW | t::DARROW | t::LPAREN |
t::RPAREN | t::LBRACKET | t::LBRACE | t::RBRACE |
t::DOLLAR => "",
// This is the start of an attribute. We're going to want to
// continue highlighting it as an attribute until the ending ']' is
// seen, so skip out early. Down below we terminate the attribute
// span when we see the ']'.
t::POUND => {
is_attribute = true;
try!(write!(out, r"<span class='attribute'>\#"));
continue
}
t::RBRACKET => {
if is_attribute {
is_attribute = false;
try!(write!(out, "]</span>"));
continue
} else {
""
}
}
// text literals
t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
// number literals
t::LIT_INT(..) | t::LIT_UINT(..) | t::LIT_INT_UNSUFFIXED(..) |
t::LIT_FLOAT(..) | t::LIT_FLOAT_UNSUFFIXED(..) => "number",
// keywords are also included in the identifier set
t::IDENT(ident, _is_mod_sep) => {
match t::get_ident(ident).get() {
"ref" | "mut" => "kw-2",
"self" => "self",
"false" | "true" => "boolval",
"Option" | "Result" => "prelude-ty",
"Some" | "None" | "Ok" | "Err" => "prelude-val",
_ if t::is_any_keyword(&next.tok) => "kw",
_ => {
if lexer.peek().tok == t::NOT {
is_macro = true;
"macro"
} else {
"ident"
}
}
}
}
t::LIFETIME(..) => "lifetime",
t::DOC_COMMENT(..) => "doccomment",
t::UNDERSCORE | t::EOF | t::INTERPOLATED(..) => "",
};
// as mentioned above, use the original source code instead of
// stringifying this token
let snip = sess.cm.span_to_snippet(next.sp).unwrap();
if klass == "" {
try!(write!(out, "{}", Escape(snip)));
} else {
try!(write!(out, "<span class='{}'>{}</span>", klass,
Escape(snip)));
}
}
write!(out, "</pre>\n")
}

View File

@ -35,6 +35,8 @@ use std::str;
use std::unstable::intrinsics; use std::unstable::intrinsics;
use std::vec; use std::vec;
use html::highlight;
/// A unit struct which has the `fmt::Show` trait implemented. When /// A unit struct which has the `fmt::Show` trait implemented. When
/// formatted, this struct will emit the HTML corresponding to the rendered /// formatted, this struct will emit the HTML corresponding to the rendered
/// version of the contained markdown string. /// version of the contained markdown string.
@ -95,6 +97,7 @@ extern {
fn sd_markdown_free(md: *sd_markdown); fn sd_markdown_free(md: *sd_markdown);
fn bufnew(unit: libc::size_t) -> *buf; fn bufnew(unit: libc::size_t) -> *buf;
fn bufputs(b: *buf, c: *libc::c_char);
fn bufrelease(b: *buf); fn bufrelease(b: *buf);
} }
@ -127,7 +130,27 @@ pub fn render(w: &mut io::Writer, s: &str) -> fmt::Result {
asize: text.len() as libc::size_t, asize: text.len() as libc::size_t,
unit: 0, unit: 0,
}; };
(my_opaque.dfltblk)(ob, &buf, lang, opaque); let rendered = if lang.is_null() {
false
} else {
vec::raw::buf_as_slice((*lang).data,
(*lang).size as uint, |rlang| {
let rlang = str::from_utf8(rlang).unwrap();
if rlang.contains("notrust") {
(my_opaque.dfltblk)(ob, &buf, lang, opaque);
true
} else {
false
}
})
};
if !rendered {
let output = highlight::highlight(text).to_c_str();
output.with_ref(|r| {
bufputs(ob, r)
})
}
}) })
} }
} }
@ -181,7 +204,8 @@ pub fn find_testable_code(doc: &str, tests: &mut ::test::Collector) {
vec::raw::buf_as_slice((*lang).data, vec::raw::buf_as_slice((*lang).data,
(*lang).size as uint, |lang| { (*lang).size as uint, |lang| {
let s = str::from_utf8(lang).unwrap(); let s = str::from_utf8(lang).unwrap();
(s.contains("should_fail"), s.contains("ignore")) (s.contains("should_fail"), s.contains("ignore") ||
s.contains("notrust"))
}) })
}; };
if ignore { return } if ignore { return }

View File

@ -50,10 +50,10 @@ use syntax::parse::token::InternedString;
use clean; use clean;
use doctree; use doctree;
use fold::DocFolder; use fold::DocFolder;
use html::escape::Escape;
use html::format::{VisSpace, Method, PuritySpace}; use html::format::{VisSpace, Method, PuritySpace};
use html::layout; use html::layout;
use html::markdown::Markdown; use html::markdown::Markdown;
use html::highlight;
/// Major driving force in all rustdoc rendering. This contains information /// Major driving force in all rustdoc rendering. This contains information
/// about where in the tree-like hierarchy rendering is occurring and controls /// about where in the tree-like hierarchy rendering is occurring and controls
@ -1091,7 +1091,8 @@ fn item_module(w: &mut Writer, cx: &Context,
fn item_function(w: &mut Writer, it: &clean::Item, fn item_function(w: &mut Writer, it: &clean::Item,
f: &clean::Function) -> fmt::Result { f: &clean::Function) -> fmt::Result {
try!(write!(w, "<pre class='fn'>{vis}{purity}fn {name}{generics}{decl}</pre>", try!(write!(w, "<pre class='rust fn'>{vis}{purity}fn \
{name}{generics}{decl}</pre>",
vis = VisSpace(it.visibility), vis = VisSpace(it.visibility),
purity = PuritySpace(f.purity), purity = PuritySpace(f.purity),
name = it.name.get_ref().as_slice(), name = it.name.get_ref().as_slice(),
@ -1112,7 +1113,7 @@ fn item_trait(w: &mut Writer, it: &clean::Item,
} }
// Output the trait definition // Output the trait definition
try!(write!(w, "<pre class='trait'>{}trait {}{}{} ", try!(write!(w, "<pre class='rust trait'>{}trait {}{}{} ",
VisSpace(it.visibility), VisSpace(it.visibility),
it.name.get_ref().as_slice(), it.name.get_ref().as_slice(),
t.generics, t.generics,
@ -1231,7 +1232,7 @@ fn render_method(w: &mut Writer, meth: &clean::Item) -> fmt::Result {
fn item_struct(w: &mut Writer, it: &clean::Item, fn item_struct(w: &mut Writer, it: &clean::Item,
s: &clean::Struct) -> fmt::Result { s: &clean::Struct) -> fmt::Result {
try!(write!(w, "<pre class='struct'>")); try!(write!(w, "<pre class='rust struct'>"));
try!(render_struct(w, it, Some(&s.generics), s.struct_type, s.fields, try!(render_struct(w, it, Some(&s.generics), s.struct_type, s.fields,
s.fields_stripped, "", true)); s.fields_stripped, "", true));
try!(write!(w, "</pre>")); try!(write!(w, "</pre>"));
@ -1255,7 +1256,7 @@ fn item_struct(w: &mut Writer, it: &clean::Item,
} }
fn item_enum(w: &mut Writer, it: &clean::Item, e: &clean::Enum) -> fmt::Result { fn item_enum(w: &mut Writer, it: &clean::Item, e: &clean::Enum) -> fmt::Result {
try!(write!(w, "<pre class='enum'>{}enum {}{}", try!(write!(w, "<pre class='rust enum'>{}enum {}{}",
VisSpace(it.visibility), VisSpace(it.visibility),
it.name.get_ref().as_slice(), it.name.get_ref().as_slice(),
e.generics)); e.generics));
@ -1532,7 +1533,7 @@ fn render_impl(w: &mut Writer, i: &clean::Impl,
fn item_typedef(w: &mut Writer, it: &clean::Item, fn item_typedef(w: &mut Writer, it: &clean::Item,
t: &clean::Typedef) -> fmt::Result { t: &clean::Typedef) -> fmt::Result {
try!(write!(w, "<pre class='typedef'>type {}{} = {};</pre>", try!(write!(w, "<pre class='rust typedef'>type {}{} = {};</pre>",
it.name.get_ref().as_slice(), it.name.get_ref().as_slice(),
t.generics, t.generics,
t.type_)); t.type_));
@ -1625,9 +1626,7 @@ impl<'a> fmt::Show for Source<'a> {
try!(write!(fmt.buf, "<span id='{0:u}'>{0:1$u}</span>\n", i, cols)); try!(write!(fmt.buf, "<span id='{0:u}'>{0:1$u}</span>\n", i, cols));
} }
try!(write!(fmt.buf, "</pre>")); try!(write!(fmt.buf, "</pre>"));
try!(write!(fmt.buf, "<pre class='rust'>")); try!(write!(fmt.buf, "{}", highlight::highlight(s.as_slice())));
try!(write!(fmt.buf, "{}", Escape(s.as_slice())));
try!(write!(fmt.buf, "</pre>"));
Ok(()) Ok(())
} }
} }

View File

@ -303,3 +303,18 @@ a {
.stability.Locked { border-color: #0084B6; color: #00668c; } .stability.Locked { border-color: #0084B6; color: #00668c; }
:target { background: #FDFFD3; } :target { background: #FDFFD3; }
pre.rust .kw { color: #cc782f; }
pre.rust .kw-2 { color: #3bbb33; }
pre.rust .prelude-ty { color: #3bbb33; }
pre.rust .number { color: #c13928; }
pre.rust .self { color: #c13928; }
pre.rust .boolval { color: #c13928; }
pre.rust .prelude-val { color: #c13928; }
pre.rust .op { color: #cc782f; }
pre.rust .comment { color: #533add; }
pre.rust .doccomment { color: #d343d0; }
pre.rust .macro { color: #d343d0; }
pre.rust .string { color: #c13928; }
pre.rust .lifetime { color: #d343d0; }
pre.rust .attribute { color: #d343d0 !important; }

View File

@ -38,6 +38,7 @@ pub mod core;
pub mod doctree; pub mod doctree;
pub mod fold; pub mod fold;
pub mod html { pub mod html {
pub mod highlight;
pub mod escape; pub mod escape;
pub mod format; pub mod format;
pub mod layout; pub mod layout;

View File

@ -82,7 +82,7 @@ function, but the `format!` macro is a syntax extension which allows it to
leverage named parameters. Named parameters are listed at the end of the leverage named parameters. Named parameters are listed at the end of the
argument list and have the syntax: argument list and have the syntax:
```ignore ```notrust
identifier '=' expression identifier '=' expression
``` ```
@ -107,7 +107,7 @@ and if all references to one argument do not provide a type, then the format `?`
is used (the type's rust-representation is printed). For example, this is an is used (the type's rust-representation is printed). For example, this is an
invalid format string: invalid format string:
```ignore ```notrust
{0:d} {0:s} {0:d} {0:s}
``` ```
@ -123,7 +123,7 @@ must have the type `uint`. Although a `uint` can be printed with `{:u}`, it is
illegal to reference an argument as such. For example, this is another invalid illegal to reference an argument as such. For example, this is another invalid
format string: format string:
```ignore ```notrust
{:.*s} {0:u} {:.*s} {0:u}
``` ```
@ -232,7 +232,7 @@ fn main() {
There are a number of related macros in the `format!` family. The ones that are There are a number of related macros in the `format!` family. The ones that are
currently implemented are: currently implemented are:
```rust,ignore ```ignore
format! // described above format! // described above
write! // first argument is a &mut io::Writer, the destination write! // first argument is a &mut io::Writer, the destination
writeln! // same as write but appends a newline writeln! // same as write but appends a newline
@ -276,7 +276,7 @@ references information on the stack. Under the hood, all of
the related macros are implemented in terms of this. First the related macros are implemented in terms of this. First
off, some example usage is: off, some example usage is:
```rust,ignore ```ignore
use std::fmt; use std::fmt;
# fn lol<T>() -> T { fail!() } # fn lol<T>() -> T { fail!() }
@ -334,7 +334,7 @@ This example is the equivalent of `{0:s}` essentially.
The select method is a switch over a `&str` parameter, and the parameter *must* The select method is a switch over a `&str` parameter, and the parameter *must*
be of the type `&str`. An example of the syntax is: be of the type `&str`. An example of the syntax is:
```ignore ```notrust
{0, select, male{...} female{...} other{...}} {0, select, male{...} female{...} other{...}}
``` ```
@ -353,7 +353,7 @@ The plural method is a switch statement over a `uint` parameter, and the
parameter *must* be a `uint`. A plural method in its full glory can be specified parameter *must* be a `uint`. A plural method in its full glory can be specified
as: as:
```ignore ```notrust
{0, plural, offset=1 =1{...} two{...} many{...} other{...}} {0, plural, offset=1 =1{...} two{...} many{...} other{...}}
``` ```
@ -381,7 +381,7 @@ should not be too alien. Arguments are formatted with python-like syntax,
meaning that arguments are surrounded by `{}` instead of the C-like `%`. The meaning that arguments are surrounded by `{}` instead of the C-like `%`. The
actual grammar for the formatting syntax is: actual grammar for the formatting syntax is:
```ignore ```notrust
format_string := <text> [ format <text> ] * format_string := <text> [ format <text> ] *
format := '{' [ argument ] [ ':' format_spec ] [ ',' function_spec ] '}' format := '{' [ argument ] [ ':' format_spec ] [ ',' function_spec ] '}'
argument := integer | identifier argument := integer | identifier
@ -896,10 +896,10 @@ impl<'a> Formatter<'a> {
/// ///
/// # Arguments /// # Arguments
/// ///
/// * is_positive - whether the original integer was positive or not. /// * is_positive - whether the original integer was positive or not.
/// * prefix - if the '#' character (FlagAlternate) is provided, this /// * prefix - if the '#' character (FlagAlternate) is provided, this
/// is the prefix to put in front of the number. /// is the prefix to put in front of the number.
/// * buf - the byte array that the number has been formatted into /// * buf - the byte array that the number has been formatted into
/// ///
/// This function will correctly account for the flags provided as well as /// This function will correctly account for the flags provided as well as
/// the minimum width. It will not take precision into account. /// the minimum width. It will not take precision into account.

View File

@ -53,7 +53,7 @@ pub trait Zero: Add<Self, Self> {
/// ///
/// # Laws /// # Laws
/// ///
/// ~~~ignore /// ~~~notrust
/// a + 0 = a ∀ a ∈ Self /// a + 0 = a ∀ a ∈ Self
/// 0 + a = a ∀ a ∈ Self /// 0 + a = a ∀ a ∈ Self
/// ~~~ /// ~~~
@ -79,7 +79,7 @@ pub trait One: Mul<Self, Self> {
/// ///
/// # Laws /// # Laws
/// ///
/// ~~~ignore /// ~~~notrust
/// a * 1 = a ∀ a ∈ Self /// a * 1 = a ∀ a ∈ Self
/// 1 * a = a ∀ a ∈ Self /// 1 * a = a ∀ a ∈ Self
/// ~~~ /// ~~~

View File

@ -3,6 +3,6 @@
file="$1/doc/foo/fn.foo.html" file="$1/doc/foo/fn.foo.html"
grep -v 'invisible' $file && grep -v 'invisible' $file &&
grep '#\[deriving(Eq)\] // Bar' $file grep '#.*\[.*deriving.*(.*Eq.*).*\].*//.*Bar' $file
exit $? exit $?