source: trunk/gsdl3/src/java/org/greenstone/gsdl3/selfContained/QueryDocumentStream.java@ 3615

Last change on this file since 3615 was 3615, checked in by say1, 21 years ago

java updates new collections now appear on web server

  • Property svn:keywords set to Author Date Id Revision
File size: 9.2 KB
Line 
1/*
2 * QueryDocumentStream.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.selfContained;
20
21// XML classes
22import javax.xml.transform.Source;
23import javax.xml.transform.Transformer;
24import javax.xml.transform.TransformerConfigurationException;
25import javax.xml.transform.TransformerException;
26import javax.xml.transform.TransformerFactory;
27import javax.xml.transform.dom.DOMResult;
28import javax.xml.transform.dom.DOMSource;
29import javax.xml.transform.stream.StreamResult;
30import javax.xml.transform.stream.StreamSource;
31import org.w3c.dom.Document;
32
33// other java classes
34import java.io.StringReader;
35
36/**
37 * QueryDocumentStream takes another document stream and removes
38 * from the stream all documents which fail to match a XSLT query.
39 * The default query returns only those documents containing the
40 * string "the".
41 *
42 * @author <a href="http://www.cs.waikato.ac.nz/~say1/">stuart yeates</a> (<a href="mailto:[email protected]">s
43[email protected]</a>) at the <a href="http://www.nzdl.org">New Zealand Digital Library</a>
44 * @version $Revision: 3615 $
45 * @see DocumentStream
46 *
47 */
48public class QueryDocumentStream implements DocumentStream {
49
50 /** The default XSLT query string. Is true (returns a non-empty document) when the input document contains the string "the". */
51 public static final String DEFAULT_QUERY_STRING =
52 "<xsl:stylesheet version=\"1.0\" " +
53 " xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">" +
54 " <xsl:output omit-xml-declaration=\"yes\"/>" +
55
56 " <xsl:template match=\"/*\">" +
57 " <xsl:if test=\"contains(.,'the')\">" +
58 " <xsl:copy-of select=\".\"/>" +
59 " </xsl:if>" +
60 " </xsl:template>" +
61 "</xsl:stylesheet>";
62 /** The first part of the default quesy for a string other than "the" */
63 public static final String DEFAULT_QUERY_STRING_1_1 =
64 "<xsl:stylesheet version=\"1.0\" " +
65 " xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">" +
66 " <xsl:output omit-xml-declaration=\"yes\"/>" +
67
68 " <xsl:template match=\"/*\">" +
69 " <xsl:if test=\"contains(.,'";
70
71 /** The second part of the default quesy for a string other than "the" */
72 public static final String DEFAULT_QUERY_STRING_1_2 =
73 "')\">" +
74 " <xsl:copy-of select=\".\"/>" +
75 " </xsl:if>" +
76 " </xsl:template>" +
77 "</xsl:stylesheet>";
78
79
80 /** The query */
81 protected StreamSource query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING));
82
83 /** The underlying DocumentStream object */
84 protected DocumentStream stream = null;
85
86 /** A cached Document */
87 protected Document cached = null;
88
89 /** The transformer factory */
90 TransformerFactory transformerFactory = null;
91
92 /** The transformer */
93 Transformer transformer = null;
94
95 /** Uses the default query (looking for the) over the default DocumentStream (all ascii strings) */
96 public QueryDocumentStream() {
97 this.query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING));
98 this.stream = new GeneratedDocumentStream();
99
100 try {
101 this.transformerFactory
102 = org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
103
104 this.transformer
105 = transformerFactory.newTransformer(query);
106 } catch (TransformerConfigurationException e) {
107 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
108 }
109 }
110
111 /** Looks for the given string in the stream of documents */
112 public QueryDocumentStream(DocumentStream stream, String query) {
113 String fullQuery = DEFAULT_QUERY_STRING_1_1 + query + DEFAULT_QUERY_STRING_1_2;
114 this.query = new StreamSource(new StringReader(fullQuery));
115 this.stream = stream;
116
117 try {
118 this.transformerFactory
119 = org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
120
121 this.transformer
122 = transformerFactory.newTransformer(this.query);
123 } catch (TransformerConfigurationException e) {
124 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
125 }
126 }
127
128
129 /** Applies the stylesheet to the stream. */
130 public QueryDocumentStream(DocumentStream stream, StreamSource source) {
131 this.query = source;
132 this.stream = stream;
133
134 try {
135 this.transformerFactory
136 = org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
137
138 this.transformer
139 = transformerFactory.newTransformer(query);
140 } catch (TransformerConfigurationException e) {
141 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
142 }
143 }
144
145
146 /** The default constructor. Searches for documents containing "the" */
147 public QueryDocumentStream(DocumentStream stream) {
148 this.query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING));
149 this.stream = stream;
150
151 try {
152 this.transformerFactory
153 = org.apache.xalan.processor.TransformerFactoryImpl.newInstance();
154
155 this.transformer
156 = transformerFactory.newTransformer(query);
157 } catch (TransformerConfigurationException e) {
158 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
159 }
160 }
161 /**
162 * Generates the nest document in the sequence
163 *
164 * @exception java.io.Exception when something goes wrong in the XML stuff
165 * @return the next Document
166 * @see org.w3c.dom.Document
167 */
168 public Document nextDocument() throws Exception {
169
170 if (hasNextDocument() == false)
171 throw new Error("no more docs");
172 Document result = cached;
173 cached = null;
174 return result;
175 }
176 /**
177 * Are there more documents ?
178 *
179 * @return true if there are more docs
180 */
181 public boolean hasNextDocument() throws Exception {
182 lookInStream();
183 return (cached != null);
184 }
185 /**
186 *
187 *
188 * @exception java.io.IOException when ...
189 * @param arg2 ...
190 * @param arg1 ...
191 * @return ...
192 * @see blank
193 * @see blank
194 */
195 protected boolean lookInStream() throws Exception {
196
197 while (stream.hasNextDocument() && cached == null) {
198
199 Document candidate = stream.nextDocument();
200
201 DOMSource input = new DOMSource(candidate);
202 DOMResult output = new DOMResult();
203
204 try {
205
206 transformer.transform(input, output);
207
208 // if the transformed document is not empty don't cache it
209 if (output.getNode().getFirstChild() != null)
210 cached = (Document) output.getNode();
211
212 } catch (TransformerConfigurationException e) {
213 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
214 } catch (TransformerException e) {
215 System.err.println("XMLTransformer: couldn't transform the source: " + e.getMessage());
216 cached = null;
217 return false;
218 } catch (Exception e) {
219 System.err.println("Exception: " + e.getMessage());
220 cached = null;
221 return false;
222 }
223
224 if (false) { // debugging info
225
226 try {
227 StreamResult result = new StreamResult(System.out);
228 Transformer transformer = transformerFactory.newTransformer();
229
230 Source source = new DOMSource(candidate);
231 transformer.transform(source,result);
232
233 System.out.print(" ==> ");
234
235 if (cached != null) {
236 Source source2 = new DOMSource(cached);
237 transformer.transform(source2,result);
238 }
239 System.out.println();
240 System.out.println("======================================================");
241
242 } catch (TransformerConfigurationException e) {
243 System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage());
244 } catch (TransformerException e) {
245 System.err.println("XMLTransformer: couldn't transform the source: " + e.getMessage());
246 cached = null;
247 return false;
248 } catch (Exception e) {
249 System.err.println("Exception: " + e.getMessage());
250 cached = null;
251 return false;
252 }
253
254 }
255 }
256 return cached == null;
257 }
258
259 /**
260 * Generates an unbounded stream of documents
261 */
262 public static void main(String args[]) throws Exception
263 {
264 TransformerFactory transformerFactory = TransformerFactory.newInstance();
265 Transformer transformer = transformerFactory.newTransformer();
266
267 GeneratedDocumentStream gStream = new GeneratedDocumentStream();
268 QueryDocumentStream qStream = new QueryDocumentStream(gStream);
269 while (qStream.hasNextDocument()) {
270 Document document = qStream.nextDocument();
271
272 StreamResult result = new StreamResult(System.out);
273
274 Source source = new DOMSource(document);
275 transformer.transform(source,result);
276
277 System.out.println();
278 System.out.println("======================================================");
279 }
280 }
281
282}
283
Note: See TracBrowser for help on using the repository browser.