Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/selfContained/QueryDocumentStream.java@ 3615

Last change on this file since 3615 was 3615, checked in by say1, 21 years ago
java updates new collections now appear on web server
Property svn:keywords set to `Author Date Id Revision`
File size: 9.2 KB

Line
1	/*
2	* QueryDocumentStream.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.selfContained;
20
21	// XML classes
22	import javax.xml.transform.Source;
23	import javax.xml.transform.Transformer;
24	import javax.xml.transform.TransformerConfigurationException;
25	import javax.xml.transform.TransformerException;
26	import javax.xml.transform.TransformerFactory;
27	import javax.xml.transform.dom.DOMResult;
28	import javax.xml.transform.dom.DOMSource;
29	import javax.xml.transform.stream.StreamResult;
30	import javax.xml.transform.stream.StreamSource;
31	import org.w3c.dom.Document;
32
33	// other java classes
34	import java.io.StringReader;
35
36	/**
37	* QueryDocumentStream takes another document stream and removes
38	* from the stream all documents which fail to match a XSLT query.
39	* The default query returns only those documents containing the
40	* string "the".
41	*
42	* @author <a href="http://www.cs.waikato.ac.nz/~say1/">stuart yeates</a> (<a href="mailto:[email protected]">s
43	[email protected]</a>) at the <a href="http://www.nzdl.org">New Zealand Digital Library</a>
44	* @version $Revision: 3615 $
45	* @see DocumentStream
46	*
47	*/
48	public class QueryDocumentStream implements DocumentStream {
49
50	/** The default XSLT query string. Is true (returns a non-empty document) when the input document contains the string "the". */
51	public static final String DEFAULT_QUERY_STRING =
52	"<xsl:stylesheet version=\"1.0\" " +
53	" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">" +
54	" <xsl:output omit-xml-declaration=\"yes\"/>" +
55
56	" <xsl:template match=\"/*\">" +
57	" <xsl:if test=\"contains(.,'the')\">" +
58	" <xsl:copy-of select=\".\"/>" +
59	" </xsl:if>" +
60	" </xsl:template>" +
61	"</xsl:stylesheet>";
62	/** The first part of the default quesy for a string other than "the" */
63	public static final String DEFAULT_QUERY_STRING_1_1 =
64	"<xsl:stylesheet version=\"1.0\" " +
65	" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">" +
66	" <xsl:output omit-xml-declaration=\"yes\"/>" +
67
68	" <xsl:template match=\"/*\">" +
69	" <xsl:if test=\"contains(.,'";
70
71	/** The second part of the default quesy for a string other than "the" */
72	public static final String DEFAULT_QUERY_STRING_1_2 =
73	"')\">" +
74	" <xsl:copy-of select=\".\"/>" +
75	" </xsl:if>" +
76	" </xsl:template>" +
77	"</xsl:stylesheet>";
78
79
80	/** The query */
81	protected StreamSource query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING));
82
83	/** The underlying DocumentStream object */
84	protected DocumentStream stream = null;
85
86	/** A cached Document */
87	protected Document cached = null;
88
89	/** The transformer factory */
90	TransformerFactory transformerFactory = null;
91
92	/** The transformer */
93	Transformer transformer = null;
94
95	/** Uses the default query (looking for the) over the default DocumentStream (all ascii strings) */
96	public QueryDocumentStream() {
97	this.query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING));
98	this.stream = new GeneratedDocumentStream();
99
100	try {
101	this.transformerFactory
102	= org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
103
104	this.transformer
105	= transformerFactory.newTransformer(query);
106	} catch (TransformerConfigurationException e) {
107	System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
108	}
109	}
110
111	/** Looks for the given string in the stream of documents */
112	public QueryDocumentStream(DocumentStream stream, String query) {
113	String fullQuery = DEFAULT_QUERY_STRING_1_1 + query + DEFAULT_QUERY_STRING_1_2;
114	this.query = new StreamSource(new StringReader(fullQuery));
115	this.stream = stream;
116
117	try {
118	this.transformerFactory
119	= org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
120
121	this.transformer
122	= transformerFactory.newTransformer(this.query);
123	} catch (TransformerConfigurationException e) {
124	System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
125	}
126	}
127
128
129	/** Applies the stylesheet to the stream. */
130	public QueryDocumentStream(DocumentStream stream, StreamSource source) {
131	this.query = source;
132	this.stream = stream;
133
134	try {
135	this.transformerFactory
136	= org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
137
138	this.transformer
139	= transformerFactory.newTransformer(query);
140	} catch (TransformerConfigurationException e) {
141	System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
142	}
143	}
144
145
146	/** The default constructor. Searches for documents containing "the" */
147	public QueryDocumentStream(DocumentStream stream) {
148	this.query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING));
149	this.stream = stream;
150
151	try {
152	this.transformerFactory
153	= org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
154
155	this.transformer
156	= transformerFactory.newTransformer(query);
157	} catch (TransformerConfigurationException e) {
158	System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
159	}
160	}
161	/**
162	* Generates the nest document in the sequence
163	*
164	* @exception java.io.Exception when something goes wrong in the XML stuff
165	* @return the next Document
166	* @see org.w3c.dom.Document
167	*/
168	public Document nextDocument() throws Exception {
169
170	if (hasNextDocument() == false)
171	throw new Error("no more docs");
172	Document result = cached;
173	cached = null;
174	return result;
175	}
176	/**
177	* Are there more documents ?
178	*
179	* @return true if there are more docs
180	*/
181	public boolean hasNextDocument() throws Exception {
182	lookInStream();
183	return (cached != null);
184	}
185	/**
186	*
187	*
188	* @exception java.io.IOException when ...
189	* @param arg2 ...
190	* @param arg1 ...
191	* @return ...
192	* @see blank
193	* @see blank
194	*/
195	protected boolean lookInStream() throws Exception {
196
197	while (stream.hasNextDocument() && cached == null) {
198
199	Document candidate = stream.nextDocument();
200
201	DOMSource input = new DOMSource(candidate);
202	DOMResult output = new DOMResult();
203
204	try {
205
206	transformer.transform(input, output);
207
208	// if the transformed document is not empty don't cache it
209	if (output.getNode().getFirstChild() != null)
210	cached = (Document) output.getNode();
211
212	} catch (TransformerConfigurationException e) {
213	System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
214	} catch (TransformerException e) {
215	System.err.println("XMLTransformer: couldn't transform the source: " + e.getMessage());
216	cached = null;
217	return false;
218	} catch (Exception e) {
219	System.err.println("Exception: " + e.getMessage());
220	cached = null;
221	return false;
222	}
223
224	if (false) { // debugging info
225
226	try {
227	StreamResult result = new StreamResult(System.out);
228	Transformer transformer = transformerFactory.newTransformer();
229
230	Source source = new DOMSource(candidate);
231	transformer.transform(source,result);
232
233	System.out.print(" ==> ");
234
235	if (cached != null) {
236	Source source2 = new DOMSource(cached);
237	transformer.transform(source2,result);
238	}
239	System.out.println();
240	System.out.println("======================================================");
241
242	} catch (TransformerConfigurationException e) {
243	System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
244	} catch (TransformerException e) {
245	System.err.println("XMLTransformer: couldn't transform the source: " + e.getMessage());
246	cached = null;
247	return false;
248	} catch (Exception e) {
249	System.err.println("Exception: " + e.getMessage());
250	cached = null;
251	return false;
252	}
253
254	}
255	}
256	return cached == null;
257	}
258
259	/**
260	* Generates an unbounded stream of documents
261	*/
262	public static void main(String args[]) throws Exception
263	{
264	TransformerFactory transformerFactory = TransformerFactory.newInstance();
265	Transformer transformer = transformerFactory.newTransformer();
266
267	GeneratedDocumentStream gStream = new GeneratedDocumentStream();
268	QueryDocumentStream qStream = new QueryDocumentStream(gStream);
269	while (qStream.hasNextDocument()) {
270	Document document = qStream.nextDocument();
271
272	StreamResult result = new StreamResult(System.out);
273
274	Source source = new DOMSource(document);
275	transformer.transform(source,result);
276
277	System.out.println();
278	System.out.println("======================================================");
279	}
280	}
281
282	}
283

Note: See TracBrowser for help on using the repository browser.

Download in other formats: