gcc/libjava/classpath/gnu/xml/pipeline/PipelineFactory.java
Tom Tromey f911ba985a Initial revision
From-SVN: r102074
2005-07-16 00:30:23 +00:00

724 lines
22 KiB
Java

/* PipelineFactory.java --
Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.xml.pipeline;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.lang.reflect.Constructor;
import java.util.StringTokenizer;
import org.xml.sax.*;
import org.xml.sax.ext.*;
/**
* This provides static factory methods for creating simple event pipelines.
* These pipelines are specified by strings, suitable for passing on
* command lines or embedding in element attributes. For example, one way
* to write a pipeline that restores namespace syntax, validates (stopping
* the pipeline on validity errors) and then writes valid data to standard
* output is this: <pre>
* nsfix | validate | write ( stdout )</pre>
*
* <p> In this syntax, the tokens are always separated by whitespace, and each
* stage of the pipeline may optionally have a parameter (which can be a
* pipeline) in parentheses. Interior stages are called filters, and the
* rightmost end of a pipeline is called a terminus.
*
* <p> Stages are usually implemented by a single class, which may not be
* able to act as both a filter and a terminus; but any terminus can be
* automatically turned into a filter, through use of a {@link TeeConsumer}.
* The stage identifiers are either class names, or are one of the following
* short identifiers built into this class. (Most of these identifiers are
* no more than aliases for classes.) The built-in identifiers include:</p>
<table border="1" cellpadding="3" cellspacing="0">
<tr bgcolor="#ccccff" class="TableHeadingColor">
<th align="center" width="5%">Stage</th>
<th align="center" width="9%">Parameter</th>
<th align="center" width="1%">Terminus</th>
<th align="center">Description</th>
</tr>
<tr valign="top" align="center">
<td><a href="../dom/Consumer.html">dom</a></td>
<td><em>none</em></td>
<td> yes </td>
<td align="left"> Applications code can access a DOM Document built
from the input event stream. When used as a filter, this buffers
data up to an <em>endDocument</em> call, and then uses a DOM parser
to report everything that has been recorded (which can easily be
less than what was reported to it). </td>
</tr>
<tr valign="top" align="center">
<td><a href="NSFilter.html">nsfix</a></td>
<td><em>none</em></td>
<td>no</td>
<td align="left">This stage ensures that the XML element and attribute
names in its output use namespace prefixes and declarations correctly.
That is, so that they match the "Namespace plus LocalName" naming data
with which each XML element and attribute is already associated. </td>
</tr>
<tr valign="top" align="center">
<td><a href="EventFilter.html">null</a></td>
<td><em>none</em></td>
<td>yes</td>
<td align="left">This stage ignores all input event data.</td>
</tr>
<tr valign="top" align="center">
<td><a href="CallFilter.html">server</a></td>
<td><em>required</em><br> server URL </td>
<td>no</td>
<td align="left">Sends its input as XML request to a remote server,
normally a web application server using the HTTP or HTTPS protocols.
The output of this stage is the parsed response from that server.</td>
</tr>
<tr valign="top" align="center">
<td><a href="TeeConsumer.html">tee</a></td>
<td><em>required</em><br> first pipeline</td>
<td>no</td>
<td align="left">This sends its events down two paths; its parameter
is a pipeline descriptor for the first path, and the second path
is the output of this stage.</td>
</tr>
<tr valign="top" align="center">
<td><a href="ValidationConsumer.html">validate</a></td>
<td><em>none</em></td>
<td>yes</td>
<td align="left">This checks for validity errors, and reports them
through its error handler. The input must include declaration events
and some lexical events. </td>
</tr>
<tr valign="top" align="center">
<td><a href="WellFormednessFilter.html">wf</a></td>
<td><em>none</em></td>
<td>yes</td>
<td align="left"> This class provides some basic "well formedness"
tests on the input event stream, and reports a fatal error if any
of them fail. One example: start/end calls for elements must match.
No SAX parser is permitted to produce malformed output, but other
components can easily do so.</td>
</tr>
<tr valign="top" align="center">
<td>write</td>
<td><em>required</em><br> "stdout", "stderr", or filename</td>
<td>yes</td>
<td align="left"> Writes its input to the specified output, as pretty
printed XML text encoded using UTF-8. Input events must be well
formed and "namespace fixed", else the output won't be XML (or possibly
namespace) conformant. The symbolic names represent
<em>System.out</em> and <em>System.err</em> respectively; names must
correspond to files which don't yet exist.</td>
</tr>
<tr valign="top" align="center">
<td>xhtml</td>
<td><em>required</em><br> "stdout", "stderr", or filename</td>
<td>yes</td>
<td align="left"> Like <em>write</em> (above), except that XHTML rules
are followed. The XHTML 1.0 Transitional document type is declared,
and only ASCII characters are written (for interoperability). Other
characters are written as entity or character references; the text is
pretty printed.</td>
</tr>
<tr valign="top" align="center">
<td><a href="XIncludeFilter.html">xinclude</a></td>
<td><em>none</em></td>
<td>no</td>
<td align="left">This stage handles XInclude processing.
This is like entity inclusion, except that the included content
is declared in-line rather than in the DTD at the beginning of
a document.
</td>
</tr>
<tr valign="top" align="center">
<td><a href="XsltFilter.html">xslt</a></td>
<td><em>required</em><br> XSLT stylesheet URI</td>
<td>no</td>
<td align="left">This stage handles XSLT transformation
according to a stylesheet.
The implementation of the transformation may not actually
stream data, although if such an XSLT engine is in use
then that can happen.
</td>
</tr>
</table>
* <p> Note that {@link EventFilter#bind} can automatically eliminate
* some filters by setting SAX2 parser features appropriately. This means
* that you can routinely put filters like "nsfix", "validate", or "wf" at the
* front of a pipeline (for components that need inputs conditioned to match
* that level of correctness), and know that it won't actually be used unless
* it's absolutely necessary.
*
* @author David Brownell
*/
public class PipelineFactory
{
/**
* Creates a simple pipeline according to the description string passed in.
*/
public static EventConsumer createPipeline (String description)
throws IOException
{
return createPipeline (description, null);
}
/**
* Extends an existing pipeline by prepending the filter pipeline to the
* specified consumer. Some pipelines need more customization than can
* be done through this simplified syntax. When they are set up with
* direct API calls, use this method to merge more complex pipeline
* segments with easily configured ones.
*/
public static EventConsumer createPipeline (
String description,
EventConsumer next
) throws IOException
{
// tokens are (for now) what's separated by whitespace;
// very easy to parse, but IDs never have spaces.
StringTokenizer tokenizer;
String tokens [];
tokenizer = new StringTokenizer (description);
tokens = new String [tokenizer.countTokens ()];
for (int i = 0; i < tokens.length; i++)
tokens [i] = tokenizer.nextToken ();
PipelineFactory factory = new PipelineFactory ();
Pipeline pipeline = factory.parsePipeline (tokens, next);
return pipeline.createPipeline ();
}
private PipelineFactory () { /* NYET */ }
/**
* Extends an existing pipeline by prepending a pre-tokenized filter
* pipeline to the specified consumer. Tokens are class names (or the
* predefined aliases) left and right parenthesis, and the vertical bar.
*/
public static EventConsumer createPipeline (
String tokens [],
EventConsumer next
) throws IOException
{
PipelineFactory factory = new PipelineFactory ();
Pipeline pipeline = factory.parsePipeline (tokens, next);
return pipeline.createPipeline ();
}
private String tokens [];
private int index;
private Pipeline parsePipeline (String toks [], EventConsumer next)
{
tokens = toks;
index = 0;
Pipeline retval = parsePipeline (next);
if (index != toks.length)
throw new ArrayIndexOutOfBoundsException (
"extra token: " + tokens [index]);
return retval;
}
// pipeline ::= stage | stage '|' pipeline
private Pipeline parsePipeline (EventConsumer next)
{
Pipeline retval = new Pipeline (parseStage ());
// minimal pipelines: "stage" and "... | id"
if (index > (tokens.length - 2)
|| !"|".equals (tokens [index])
) {
retval.next = next;
return retval;
}
index++;
retval.rest = parsePipeline (next);
return retval;
}
// stage ::= id | id '(' pipeline ')'
private Stage parseStage ()
{
Stage retval = new Stage (tokens [index++]);
// minimal stages: "id" and "id ( id )"
if (index > (tokens.length - 2)
|| !"(".equals (tokens [index]) /*)*/
)
return retval;
index++;
retval.param = parsePipeline (null);
if (index >= tokens.length)
throw new ArrayIndexOutOfBoundsException (
"missing right paren");
if (/*(*/ !")".equals (tokens [index++]))
throw new ArrayIndexOutOfBoundsException (
"required right paren, not: " + tokens [index - 1]);
return retval;
}
//
// these classes obey the conventions for constructors, so they're
// only built in to this table of shortnames
//
// - filter (one or two types of arglist)
// * last constructor is 'next' element
// * optional (first) string parameter
//
// - terminus (one or types of arglist)
// * optional (only) string parameter
//
// terminus stages are transformed into filters if needed, by
// creating a "tee". filter stages aren't turned to terminus
// stages though; either eliminate such stages, or add some
// terminus explicitly.
//
private static final String builtinStages [][] = {
{ "dom", "gnu.xml.dom.Consumer" },
{ "nsfix", "gnu.xml.pipeline.NSFilter" },
{ "null", "gnu.xml.pipeline.EventFilter" },
{ "server", "gnu.xml.pipeline.CallFilter" },
{ "tee", "gnu.xml.pipeline.TeeConsumer" },
{ "validate", "gnu.xml.pipeline.ValidationConsumer" },
{ "wf", "gnu.xml.pipeline.WellFormednessFilter" },
{ "xinclude", "gnu.xml.pipeline.XIncludeFilter" },
{ "xslt", "gnu.xml.pipeline.XsltFilter" },
// XXX want: option for validate, to preload external part of a DTD
// xhtml, write ... nyet generic-ready
};
private static class Stage
{
String id;
Pipeline param;
Stage (String name)
{ id = name; }
public String toString ()
{
if (param == null)
return id;
return id + " ( " + param + " )";
}
private void fail (String message)
throws IOException
{
throw new IOException ("in '" + id
+ "' stage of pipeline, " + message);
}
EventConsumer createStage (EventConsumer next)
throws IOException
{
String name = id;
// most builtins are just class aliases
for (int i = 0; i < builtinStages.length; i++) {
if (id.equals (builtinStages [i][0])) {
name = builtinStages [i][1];
break;
}
}
// Save output as XML or XHTML text
if ("write".equals (name) || "xhtml".equals (name)) {
String filename;
boolean isXhtml = "xhtml".equals (name);
OutputStream out = null;
TextConsumer consumer;
if (param == null)
fail ("parameter is required");
filename = param.toString ();
if ("stdout".equals (filename))
out = System.out;
else if ("stderr".equals (filename))
out = System.err;
else {
File f = new File (filename);
/*
if (!f.isAbsolute ())
fail ("require absolute file paths");
*/
if (f.exists ())
fail ("file already exists: " + f.getName ());
// XXX this races against the existence test
out = new FileOutputStream (f);
}
if (!isXhtml)
consumer = new TextConsumer (out);
else
consumer = new TextConsumer (
new OutputStreamWriter (out, "8859_1"),
true);
consumer.setPrettyPrinting (true);
if (next == null)
return consumer;
return new TeeConsumer (consumer, next);
} else {
//
// Here go all the builtins that are just aliases for
// classes, and all stage IDs that started out as such
// class names. The following logic relies on several
// documented conventions for constructor invocation.
//
String msg = null;
try {
Class klass = Class.forName (name);
Class argTypes [] = null;
Constructor constructor = null;
boolean filter = false;
Object params [] = null;
Object obj = null;
// do we need a filter stage?
if (next != null) {
// "next" consumer is always passed, with
// or without the optional string param
if (param == null) {
argTypes = new Class [1];
argTypes [0] = EventConsumer.class;
params = new Object [1];
params [0] = next;
msg = "no-param filter";
} else {
argTypes = new Class [2];
argTypes [0] = String.class;
argTypes [1] = EventConsumer.class;
params = new Object [2];
params [0] = param.toString ();
params [1] = next;
msg = "one-param filter";
}
try {
constructor = klass.getConstructor (argTypes);
} catch (NoSuchMethodException e) {
// try creating a filter from a
// terminus and a tee
filter = true;
msg += " built from ";
}
}
// build from a terminus stage, with or
// without the optional string param
if (constructor == null) {
String tmp;
if (param == null) {
argTypes = new Class [0];
params = new Object [0];
tmp = "no-param terminus";
} else {
argTypes = new Class [1];
argTypes [0] = String.class;
params = new Object [1];
params [0] = param.toString ();
tmp = "one-param terminus";
}
if (msg == null)
msg = tmp;
else
msg += tmp;
constructor = klass.getConstructor (argTypes);
// NOT creating terminus by dead-ending
// filters ... users should think about
// that one, something's likely wrong
}
obj = constructor.newInstance (params);
// return EventConsumers directly, perhaps after
// turning them into a filter
if (obj instanceof EventConsumer) {
if (filter)
return new TeeConsumer ((EventConsumer) obj, next);
return (EventConsumer) obj;
}
// if it's not a handler, it's an error
// we can wrap handlers in a filter
EventFilter retval = new EventFilter ();
boolean updated = false;
if (obj instanceof ContentHandler) {
retval.setContentHandler ((ContentHandler) obj);
updated = true;
}
if (obj instanceof DTDHandler) {
retval.setDTDHandler ((DTDHandler) obj);
updated = true;
}
if (obj instanceof LexicalHandler) {
retval.setProperty (
EventFilter.PROPERTY_URI + "lexical-handler",
obj);
updated = true;
}
if (obj instanceof DeclHandler) {
retval.setProperty (
EventFilter.PROPERTY_URI + "declaration-handler",
obj);
updated = true;
}
if (!updated)
fail ("class is neither Consumer nor Handler");
if (filter)
return new TeeConsumer (retval, next);
return retval;
} catch (IOException e) {
throw e;
} catch (NoSuchMethodException e) {
fail (name + " constructor missing -- " + msg);
} catch (ClassNotFoundException e) {
fail (name + " class not found");
} catch (Exception e) {
// e.printStackTrace ();
fail ("stage not available: " + e.getMessage ());
}
}
// NOTREACHED
return null;
}
}
private static class Pipeline
{
Stage stage;
// rest may be null
Pipeline rest;
EventConsumer next;
Pipeline (Stage s)
{ stage = s; }
public String toString ()
{
if (rest == null && next == null)
return stage.toString ();
if (rest != null)
return stage + " | " + rest;
throw new IllegalArgumentException ("next");
}
EventConsumer createPipeline ()
throws IOException
{
if (next == null) {
if (rest == null)
next = stage.createStage (null);
else
next = stage.createStage (rest.createPipeline ());
}
return next;
}
}
/*
public static void main (String argv [])
{
try {
// three basic terminus cases
createPipeline ("null");
createPipeline ("validate");
createPipeline ("write ( stdout )");
// four basic filters
createPipeline ("nsfix | write ( stderr )");
createPipeline ("wf | null");
createPipeline ("null | null");
createPipeline (
"call ( http://www.example.com/services/xml-1a ) | xhtml ( stdout )");
// tee junctions
createPipeline ("tee ( validate ) | write ( stdout )");
createPipeline ("tee ( nsfix | write ( stdout ) ) | validate");
// longer pipeline
createPipeline ("nsfix | tee ( validate ) | write ( stdout )");
createPipeline (
"null | wf | nsfix | tee ( validate ) | write ( stdout )");
// try some parsing error cases
try {
createPipeline ("null ("); // extra token '('
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("nsfix |"); // extra token '|'
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("xhtml ( foo"); // missing right paren
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("xhtml ( foo bar"); // required right paren
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("tee ( nsfix | validate");// missing right paren
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
// try some construction error cases
try {
createPipeline ("call"); // missing param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("call ( foobar )"); // broken param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("nsfix ( foobar )"); // illegal param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("null ( foobar )"); // illegal param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("wf ( foobar )"); // illegal param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("xhtml ( foobar.html )");
new File ("foobar.html").delete ();
// now supported
} catch (Exception e) {
System.err.println ("** err: " + e.getMessage ()); }
try {
createPipeline ("xhtml"); // missing param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("write ( stdout ) | null"); // nonterminal
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("validate | null");
// now supported
} catch (Exception e) {
System.err.println ("** err: " + e.getMessage ()); }
try {
createPipeline ("validate ( foo )"); // illegal param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("tee"); // missing param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
// only builtins so far
createPipeline ("com.example.xml.FilterClass");
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
} catch (Exception e) {
e.printStackTrace ();
}
}
/**/
}