1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
|
/* PipelineFactory.java --
Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.xml.pipeline;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.lang.reflect.Constructor;
import java.util.StringTokenizer;
import org.xml.sax.*;
import org.xml.sax.ext.*;
/**
* This provides static factory methods for creating simple event pipelines.
* These pipelines are specified by strings, suitable for passing on
* command lines or embedding in element attributes. For example, one way
* to write a pipeline that restores namespace syntax, validates (stopping
* the pipeline on validity errors) and then writes valid data to standard
* output is this: <pre>
* nsfix | validate | write ( stdout )</pre>
*
* <p> In this syntax, the tokens are always separated by whitespace, and each
* stage of the pipeline may optionally have a parameter (which can be a
* pipeline) in parentheses. Interior stages are called filters, and the
* rightmost end of a pipeline is called a terminus.
*
* <p> Stages are usually implemented by a single class, which may not be
* able to act as both a filter and a terminus; but any terminus can be
* automatically turned into a filter, through use of a {@link TeeConsumer}.
* The stage identifiers are either class names, or are one of the following
* short identifiers built into this class. (Most of these identifiers are
* no more than aliases for classes.) The built-in identifiers include:</p>
<table border="1" cellpadding="3" cellspacing="0">
<tr bgcolor="#ccccff" class="TableHeadingColor">
<th align="center" width="5%">Stage</th>
<th align="center" width="9%">Parameter</th>
<th align="center" width="1%">Terminus</th>
<th align="center">Description</th>
</tr>
<tr valign="top" align="center">
<td><a href="../dom/Consumer.html">dom</a></td>
<td><em>none</em></td>
<td> yes </td>
<td align="left"> Applications code can access a DOM Document built
from the input event stream. When used as a filter, this buffers
data up to an <em>endDocument</em> call, and then uses a DOM parser
to report everything that has been recorded (which can easily be
less than what was reported to it). </td>
</tr>
<tr valign="top" align="center">
<td><a href="NSFilter.html">nsfix</a></td>
<td><em>none</em></td>
<td>no</td>
<td align="left">This stage ensures that the XML element and attribute
names in its output use namespace prefixes and declarations correctly.
That is, so that they match the "Namespace plus LocalName" naming data
with which each XML element and attribute is already associated. </td>
</tr>
<tr valign="top" align="center">
<td><a href="EventFilter.html">null</a></td>
<td><em>none</em></td>
<td>yes</td>
<td align="left">This stage ignores all input event data.</td>
</tr>
<tr valign="top" align="center">
<td><a href="CallFilter.html">server</a></td>
<td><em>required</em><br> server URL </td>
<td>no</td>
<td align="left">Sends its input as XML request to a remote server,
normally a web application server using the HTTP or HTTPS protocols.
The output of this stage is the parsed response from that server.</td>
</tr>
<tr valign="top" align="center">
<td><a href="TeeConsumer.html">tee</a></td>
<td><em>required</em><br> first pipeline</td>
<td>no</td>
<td align="left">This sends its events down two paths; its parameter
is a pipeline descriptor for the first path, and the second path
is the output of this stage.</td>
</tr>
<tr valign="top" align="center">
<td><a href="ValidationConsumer.html">validate</a></td>
<td><em>none</em></td>
<td>yes</td>
<td align="left">This checks for validity errors, and reports them
through its error handler. The input must include declaration events
and some lexical events. </td>
</tr>
<tr valign="top" align="center">
<td><a href="WellFormednessFilter.html">wf</a></td>
<td><em>none</em></td>
<td>yes</td>
<td align="left"> This class provides some basic "well formedness"
tests on the input event stream, and reports a fatal error if any
of them fail. One example: start/end calls for elements must match.
No SAX parser is permitted to produce malformed output, but other
components can easily do so.</td>
</tr>
<tr valign="top" align="center">
<td>write</td>
<td><em>required</em><br> "stdout", "stderr", or filename</td>
<td>yes</td>
<td align="left"> Writes its input to the specified output, as pretty
printed XML text encoded using UTF-8. Input events must be well
formed and "namespace fixed", else the output won't be XML (or possibly
namespace) conformant. The symbolic names represent
<em>System.out</em> and <em>System.err</em> respectively; names must
correspond to files which don't yet exist.</td>
</tr>
<tr valign="top" align="center">
<td>xhtml</td>
<td><em>required</em><br> "stdout", "stderr", or filename</td>
<td>yes</td>
<td align="left"> Like <em>write</em> (above), except that XHTML rules
are followed. The XHTML 1.0 Transitional document type is declared,
and only ASCII characters are written (for interoperability). Other
characters are written as entity or character references; the text is
pretty printed.</td>
</tr>
<tr valign="top" align="center">
<td><a href="XIncludeFilter.html">xinclude</a></td>
<td><em>none</em></td>
<td>no</td>
<td align="left">This stage handles XInclude processing.
This is like entity inclusion, except that the included content
is declared in-line rather than in the DTD at the beginning of
a document.
</td>
</tr>
<tr valign="top" align="center">
<td><a href="XsltFilter.html">xslt</a></td>
<td><em>required</em><br> XSLT stylesheet URI</td>
<td>no</td>
<td align="left">This stage handles XSLT transformation
according to a stylesheet.
The implementation of the transformation may not actually
stream data, although if such an XSLT engine is in use
then that can happen.
</td>
</tr>
</table>
* <p> Note that {@link EventFilter#bind} can automatically eliminate
* some filters by setting SAX2 parser features appropriately. This means
* that you can routinely put filters like "nsfix", "validate", or "wf" at the
* front of a pipeline (for components that need inputs conditioned to match
* that level of correctness), and know that it won't actually be used unless
* it's absolutely necessary.
*
* @author David Brownell
*/
public class PipelineFactory
{
/**
* Creates a simple pipeline according to the description string passed in.
*/
public static EventConsumer createPipeline (String description)
throws IOException
{
return createPipeline (description, null);
}
/**
* Extends an existing pipeline by prepending the filter pipeline to the
* specified consumer. Some pipelines need more customization than can
* be done through this simplified syntax. When they are set up with
* direct API calls, use this method to merge more complex pipeline
* segments with easily configured ones.
*/
public static EventConsumer createPipeline (
String description,
EventConsumer next
) throws IOException
{
// tokens are (for now) what's separated by whitespace;
// very easy to parse, but IDs never have spaces.
StringTokenizer tokenizer;
String tokens [];
tokenizer = new StringTokenizer (description);
tokens = new String [tokenizer.countTokens ()];
for (int i = 0; i < tokens.length; i++)
tokens [i] = tokenizer.nextToken ();
PipelineFactory factory = new PipelineFactory ();
Pipeline pipeline = factory.parsePipeline (tokens, next);
return pipeline.createPipeline ();
}
private PipelineFactory () { /* NYET */ }
/**
* Extends an existing pipeline by prepending a pre-tokenized filter
* pipeline to the specified consumer. Tokens are class names (or the
* predefined aliases) left and right parenthesis, and the vertical bar.
*/
public static EventConsumer createPipeline (
String tokens [],
EventConsumer next
) throws IOException
{
PipelineFactory factory = new PipelineFactory ();
Pipeline pipeline = factory.parsePipeline (tokens, next);
return pipeline.createPipeline ();
}
private String tokens [];
private int index;
private Pipeline parsePipeline (String toks [], EventConsumer next)
{
tokens = toks;
index = 0;
Pipeline retval = parsePipeline (next);
if (index != toks.length)
throw new ArrayIndexOutOfBoundsException (
"extra token: " + tokens [index]);
return retval;
}
// pipeline ::= stage | stage '|' pipeline
private Pipeline parsePipeline (EventConsumer next)
{
Pipeline retval = new Pipeline (parseStage ());
// minimal pipelines: "stage" and "... | id"
if (index > (tokens.length - 2)
|| !"|".equals (tokens [index])
) {
retval.next = next;
return retval;
}
index++;
retval.rest = parsePipeline (next);
return retval;
}
// stage ::= id | id '(' pipeline ')'
private Stage parseStage ()
{
Stage retval = new Stage (tokens [index++]);
// minimal stages: "id" and "id ( id )"
if (index > (tokens.length - 2)
|| !"(".equals (tokens [index]) /*)*/
)
return retval;
index++;
retval.param = parsePipeline (null);
if (index >= tokens.length)
throw new ArrayIndexOutOfBoundsException (
"missing right paren");
if (/*(*/ !")".equals (tokens [index++]))
throw new ArrayIndexOutOfBoundsException (
"required right paren, not: " + tokens [index - 1]);
return retval;
}
//
// these classes obey the conventions for constructors, so they're
// only built in to this table of shortnames
//
// - filter (one or two types of arglist)
// * last constructor is 'next' element
// * optional (first) string parameter
//
// - terminus (one or types of arglist)
// * optional (only) string parameter
//
// terminus stages are transformed into filters if needed, by
// creating a "tee". filter stages aren't turned to terminus
// stages though; either eliminate such stages, or add some
// terminus explicitly.
//
private static final String builtinStages [][] = {
{ "dom", "gnu.xml.dom.Consumer" },
{ "nsfix", "gnu.xml.pipeline.NSFilter" },
{ "null", "gnu.xml.pipeline.EventFilter" },
{ "server", "gnu.xml.pipeline.CallFilter" },
{ "tee", "gnu.xml.pipeline.TeeConsumer" },
{ "validate", "gnu.xml.pipeline.ValidationConsumer" },
{ "wf", "gnu.xml.pipeline.WellFormednessFilter" },
{ "xinclude", "gnu.xml.pipeline.XIncludeFilter" },
{ "xslt", "gnu.xml.pipeline.XsltFilter" },
// XXX want: option for validate, to preload external part of a DTD
// xhtml, write ... nyet generic-ready
};
private static class Stage
{
String id;
Pipeline param;
Stage (String name)
{ id = name; }
public String toString ()
{
if (param == null)
return id;
return id + " ( " + param + " )";
}
private void fail (String message)
throws IOException
{
throw new IOException ("in '" + id
+ "' stage of pipeline, " + message);
}
EventConsumer createStage (EventConsumer next)
throws IOException
{
String name = id;
// most builtins are just class aliases
for (int i = 0; i < builtinStages.length; i++) {
if (id.equals (builtinStages [i][0])) {
name = builtinStages [i][1];
break;
}
}
// Save output as XML or XHTML text
if ("write".equals (name) || "xhtml".equals (name)) {
String filename;
boolean isXhtml = "xhtml".equals (name);
OutputStream out = null;
TextConsumer consumer;
if (param == null)
fail ("parameter is required");
filename = param.toString ();
if ("stdout".equals (filename))
out = System.out;
else if ("stderr".equals (filename))
out = System.err;
else {
File f = new File (filename);
/*
if (!f.isAbsolute ())
fail ("require absolute file paths");
*/
if (f.exists ())
fail ("file already exists: " + f.getName ());
// XXX this races against the existence test
out = new FileOutputStream (f);
}
if (!isXhtml)
consumer = new TextConsumer (out);
else
consumer = new TextConsumer (
new OutputStreamWriter (out, "8859_1"),
true);
consumer.setPrettyPrinting (true);
if (next == null)
return consumer;
return new TeeConsumer (consumer, next);
} else {
//
// Here go all the builtins that are just aliases for
// classes, and all stage IDs that started out as such
// class names. The following logic relies on several
// documented conventions for constructor invocation.
//
String msg = null;
try {
Class klass = Class.forName (name);
Class argTypes [] = null;
Constructor constructor = null;
boolean filter = false;
Object params [] = null;
Object obj = null;
// do we need a filter stage?
if (next != null) {
// "next" consumer is always passed, with
// or without the optional string param
if (param == null) {
argTypes = new Class [1];
argTypes [0] = EventConsumer.class;
params = new Object [1];
params [0] = next;
msg = "no-param filter";
} else {
argTypes = new Class [2];
argTypes [0] = String.class;
argTypes [1] = EventConsumer.class;
params = new Object [2];
params [0] = param.toString ();
params [1] = next;
msg = "one-param filter";
}
try {
constructor = klass.getConstructor (argTypes);
} catch (NoSuchMethodException e) {
// try creating a filter from a
// terminus and a tee
filter = true;
msg += " built from ";
}
}
// build from a terminus stage, with or
// without the optional string param
if (constructor == null) {
String tmp;
if (param == null) {
argTypes = new Class [0];
params = new Object [0];
tmp = "no-param terminus";
} else {
argTypes = new Class [1];
argTypes [0] = String.class;
params = new Object [1];
params [0] = param.toString ();
tmp = "one-param terminus";
}
if (msg == null)
msg = tmp;
else
msg += tmp;
constructor = klass.getConstructor (argTypes);
// NOT creating terminus by dead-ending
// filters ... users should think about
// that one, something's likely wrong
}
obj = constructor.newInstance (params);
// return EventConsumers directly, perhaps after
// turning them into a filter
if (obj instanceof EventConsumer) {
if (filter)
return new TeeConsumer ((EventConsumer) obj, next);
return (EventConsumer) obj;
}
// if it's not a handler, it's an error
// we can wrap handlers in a filter
EventFilter retval = new EventFilter ();
boolean updated = false;
if (obj instanceof ContentHandler) {
retval.setContentHandler ((ContentHandler) obj);
updated = true;
}
if (obj instanceof DTDHandler) {
retval.setDTDHandler ((DTDHandler) obj);
updated = true;
}
if (obj instanceof LexicalHandler) {
retval.setProperty (
EventFilter.PROPERTY_URI + "lexical-handler",
obj);
updated = true;
}
if (obj instanceof DeclHandler) {
retval.setProperty (
EventFilter.PROPERTY_URI + "declaration-handler",
obj);
updated = true;
}
if (!updated)
fail ("class is neither Consumer nor Handler");
if (filter)
return new TeeConsumer (retval, next);
return retval;
} catch (IOException e) {
throw e;
} catch (NoSuchMethodException e) {
fail (name + " constructor missing -- " + msg);
} catch (ClassNotFoundException e) {
fail (name + " class not found");
} catch (Exception e) {
// e.printStackTrace ();
fail ("stage not available: " + e.getMessage ());
}
}
// NOTREACHED
return null;
}
}
private static class Pipeline
{
Stage stage;
// rest may be null
Pipeline rest;
EventConsumer next;
Pipeline (Stage s)
{ stage = s; }
public String toString ()
{
if (rest == null && next == null)
return stage.toString ();
if (rest != null)
return stage + " | " + rest;
throw new IllegalArgumentException ("next");
}
EventConsumer createPipeline ()
throws IOException
{
if (next == null) {
if (rest == null)
next = stage.createStage (null);
else
next = stage.createStage (rest.createPipeline ());
}
return next;
}
}
/*
public static void main (String argv [])
{
try {
// three basic terminus cases
createPipeline ("null");
createPipeline ("validate");
createPipeline ("write ( stdout )");
// four basic filters
createPipeline ("nsfix | write ( stderr )");
createPipeline ("wf | null");
createPipeline ("null | null");
createPipeline (
"call ( http://www.example.com/services/xml-1a ) | xhtml ( stdout )");
// tee junctions
createPipeline ("tee ( validate ) | write ( stdout )");
createPipeline ("tee ( nsfix | write ( stdout ) ) | validate");
// longer pipeline
createPipeline ("nsfix | tee ( validate ) | write ( stdout )");
createPipeline (
"null | wf | nsfix | tee ( validate ) | write ( stdout )");
// try some parsing error cases
try {
createPipeline ("null ("); // extra token '('
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("nsfix |"); // extra token '|'
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("xhtml ( foo"); // missing right paren
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("xhtml ( foo bar"); // required right paren
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("tee ( nsfix | validate");// missing right paren
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
// try some construction error cases
try {
createPipeline ("call"); // missing param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("call ( foobar )"); // broken param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("nsfix ( foobar )"); // illegal param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("null ( foobar )"); // illegal param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("wf ( foobar )"); // illegal param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("xhtml ( foobar.html )");
new File ("foobar.html").delete ();
// now supported
} catch (Exception e) {
System.err.println ("** err: " + e.getMessage ()); }
try {
createPipeline ("xhtml"); // missing param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("write ( stdout ) | null"); // nonterminal
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("validate | null");
// now supported
} catch (Exception e) {
System.err.println ("** err: " + e.getMessage ()); }
try {
createPipeline ("validate ( foo )"); // illegal param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
createPipeline ("tee"); // missing param
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
try {
// only builtins so far
createPipeline ("com.example.xml.FilterClass");
System.err.println ("** didn't report error");
} catch (Exception e) {
System.err.println ("== err: " + e.getMessage ()); }
} catch (Exception e) {
e.printStackTrace ();
}
}
/**/
}
|