source: trunk/gsdl3/src/java/org/greenstone/gsdl3/selfContained/QueryDocumentStream.java@ 5663

Last change on this file since 5663 was 5663, checked in by kjdon, 21 years ago

fixed up some bad javadoc

  • Property svn:keywords set to Author Date Id Revision
File size: 9.2 KB
Line 
1/*
2 * QueryDocumentStream.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.selfContained;
20
21// XML classes
22import javax.xml.transform.Source;
23import javax.xml.transform.Transformer;
24import javax.xml.transform.TransformerConfigurationException;
25import javax.xml.transform.TransformerException;
26import javax.xml.transform.TransformerFactory;
27import javax.xml.transform.dom.DOMResult;
28import javax.xml.transform.dom.DOMSource;
29import javax.xml.transform.stream.StreamResult;
30import javax.xml.transform.stream.StreamSource;
31import org.w3c.dom.Document;
32
33// other java classes
34import java.io.StringReader;
35
36/**
37 * QueryDocumentStream takes another document stream and removes
38 * from the stream all documents which fail to match a XSLT query.
39 * The default query returns only those documents containing the
40 * string "the".
41 *
42 * @author <a href="http://www.cs.waikato.ac.nz/~say1/">stuart yeates</a> (<a href="mailto:[email protected]">s
43[email protected]</a>) at the <a href="http://www.nzdl.org">New Zealand Digital Library</a>
44 * @version $Revision: 5663 $
45 * @see DocumentStream
46 *
47 */
48public class QueryDocumentStream implements DocumentStream {
49
50 /** The default XSLT query string. Is true (returns a non-empty document) when the input document contains the string "the". */
51 public static final String DEFAULT_QUERY_STRING =
52 "<xsl:stylesheet version=\"1.0\" " +
53 " xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">" +
54 " <xsl:output omit-xml-declaration=\"yes\"/>" +
55
56 " <xsl:template match=\"/*\">" +
57 " <xsl:if test=\"contains(.,'the')\">" +
58 " <xsl:copy-of select=\".\"/>" +
59 " </xsl:if>" +
60 " </xsl:template>" +
61 "</xsl:stylesheet>";
62 /** The first part of the default quesy for a string other than "the" */
63 public static final String DEFAULT_QUERY_STRING_1_1 =
64 "<xsl:stylesheet version=\"1.0\" " +
65 " xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">" +
66 " <xsl:output omit-xml-declaration=\"yes\"/>" +
67
68 " <xsl:template match=\"/*\">" +
69 " <xsl:if test=\"contains(.,'";
70
71 /** The second part of the default quesy for a string other than "the" */
72 public static final String DEFAULT_QUERY_STRING_1_2 =
73 "')\">" +
74 " <xsl:copy-of select=\".\"/>" +
75 " </xsl:if>" +
76 " </xsl:template>" +
77 "</xsl:stylesheet>";
78
79
80 /** The query */
81 protected StreamSource query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING));
82
83 /** The underlying DocumentStream object */
84 protected DocumentStream stream = null;
85
86 /** A cached Document */
87 protected Document cached = null;
88
89 /** The transformer factory */
90 TransformerFactory transformerFactory = null;
91
92 /** The transformer */
93 Transformer transformer = null;
94
95 /** Uses the default query (looking for the) over the default DocumentStream (all ascii strings) */
96 public QueryDocumentStream() {
97 this.query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING));
98 this.stream = new GeneratedDocumentStream();
99
100 try {
101 this.transformerFactory
102 = org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
103
104 this.transformer
105 = transformerFactory.newTransformer(query);
106 } catch (TransformerConfigurationException e) {
107 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
108 }
109 }
110
111 /** Looks for the given string in the stream of documents */
112 public QueryDocumentStream(DocumentStream stream, String query) {
113 String fullQuery = DEFAULT_QUERY_STRING_1_1 + query + DEFAULT_QUERY_STRING_1_2;
114 this.query = new StreamSource(new StringReader(fullQuery));
115 this.stream = stream;
116
117 try {
118 this.transformerFactory
119 = org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
120
121 this.transformer
122 = transformerFactory.newTransformer(this.query);
123 } catch (TransformerConfigurationException e) {
124 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
125 }
126 }
127
128
129 /** Applies the stylesheet to the stream. */
130 public QueryDocumentStream(DocumentStream stream, StreamSource source) {
131 this.query = source;
132 this.stream = stream;
133
134 try {
135 this.transformerFactory
136 = org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
137
138 this.transformer
139 = transformerFactory.newTransformer(query);
140 } catch (TransformerConfigurationException e) {
141 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
142 }
143 }
144
145
146 /** The default constructor. Searches for documents containing "the" */
147 public QueryDocumentStream(DocumentStream stream) {
148 this.query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING));
149 this.stream = stream;
150
151 try {
152 this.transformerFactory
153 = org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
154
155 this.transformer
156 = transformerFactory.newTransformer(query);
157 } catch (TransformerConfigurationException e) {
158 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
159 }
160 }
161 /**
162 * Generates the nest document in the sequence
163 *
164 * @exception java.io.Exception when something goes wrong in the XML stuff
165 * @return the next Document
166 * @see org.w3c.dom.Document
167 */
168 public Document nextDocument() throws Exception {
169
170 if (hasNextDocument() == false)
171 throw new Error("no more docs");
172 Document result = cached;
173 cached = null;
174 return result;
175 }
176 /**
177 * Are there more documents ?
178 *
179 * @return true if there are more docs
180 */
181 public boolean hasNextDocument() throws Exception {
182 lookInStream();
183 return (cached != null);
184 }
185 /**
186 *
187 *
188 * @exception java.io.IOException when ...
189 * @param arg2 ...
190 * @param arg1 ...
191 * @return ...
192 */
193 protected boolean lookInStream() throws Exception {
194
195 while (stream.hasNextDocument() && cached == null) {
196
197 Document candidate = stream.nextDocument();
198
199 DOMSource input = new DOMSource(candidate);
200 DOMResult output = new DOMResult();
201
202 try {
203
204 transformer.transform(input, output);
205
206 // if the transformed document is not empty don't cache it
207 if (output.getNode().getFirstChild() != null)
208 cached = (Document) output.getNode();
209
210 } catch (TransformerConfigurationException e) {
211 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
212 } catch (TransformerException e) {
213 System.err.println("XMLTransformer: couldn't transform the source: " + e.getMessage());
214 cached = null;
215 return false;
216 } catch (Exception e) {
217 System.err.println("Exception: " + e.getMessage());
218 cached = null;
219 return false;
220 }
221
222 if (false) { // debugging info
223
224 try {
225 StreamResult result = new StreamResult(System.out);
226 Transformer transformer = transformerFactory.newTransformer();
227
228 Source source = new DOMSource(candidate);
229 transformer.transform(source,result);
230
231 System.out.print(" ==> ");
232
233 if (cached != null) {
234 Source source2 = new DOMSource(cached);
235 transformer.transform(source2,result);
236 }
237 System.out.println();
238 System.out.println("======================================================");
239
240 } catch (TransformerConfigurationException e) {
241 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
242 } catch (TransformerException e) {
243 System.err.println("XMLTransformer: couldn't transform the source: " + e.getMessage());
244 cached = null;
245 return false;
246 } catch (Exception e) {
247 System.err.println("Exception: " + e.getMessage());
248 cached = null;
249 return false;
250 }
251
252 }
253 }
254 return cached == null;
255 }
256
257 /**
258 * Generates an unbounded stream of documents
259 */
260 public static void main(String args[]) throws Exception
261 {
262 TransformerFactory transformerFactory = TransformerFactory.newInstance();
263 Transformer transformer = transformerFactory.newTransformer();
264
265 GeneratedDocumentStream gStream = new GeneratedDocumentStream();
266 QueryDocumentStream qStream = new QueryDocumentStream(gStream);
267 while (qStream.hasNextDocument()) {
268 Document document = qStream.nextDocument();
269
270 StreamResult result = new StreamResult(System.out);
271
272 Source source = new DOMSource(document);
273 transformer.transform(source,result);
274
275 System.out.println();
276 System.out.println("======================================================");
277 }
278 }
279
280}
281
Note: See TracBrowser for help on using the repository browser.