/* * QueryDocumentStream.java * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.greenstone.gsdl3.selfContained; // XML classes import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMResult; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import org.w3c.dom.Document; // other java classes import java.io.StringReader; /** * QueryDocumentStream takes another document stream and removes * from the stream all documents which fail to match a XSLT query. * The default query returns only those documents containing the * string "the". * * @author stuart yeates (s .yeates@cs.waikato.ac.nz) at the New Zealand Digital Library * @version $Revision: 5663 $ * @see DocumentStream * */ public class QueryDocumentStream implements DocumentStream { /** The default XSLT query string. Is true (returns a non-empty document) when the input document contains the string "the". */ public static final String DEFAULT_QUERY_STRING = "" + " " + " " + " " + " " + " " + " " + ""; /** The first part of the default quesy for a string other than "the" */ public static final String DEFAULT_QUERY_STRING_1_1 = "" + " " + " " + " " + " " + " " + " " + ""; /** The query */ protected StreamSource query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING)); /** The underlying DocumentStream object */ protected DocumentStream stream = null; /** A cached Document */ protected Document cached = null; /** The transformer factory */ TransformerFactory transformerFactory = null; /** The transformer */ Transformer transformer = null; /** Uses the default query (looking for the) over the default DocumentStream (all ascii strings) */ public QueryDocumentStream() { this.query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING)); this.stream = new GeneratedDocumentStream(); try { this.transformerFactory = org.apache.xalan.processor.TransformerFactoryImpl.newInstance(); this.transformer = transformerFactory.newTransformer(query); } catch (TransformerConfigurationException e) { System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage()); } } /** Looks for the given string in the stream of documents */ public QueryDocumentStream(DocumentStream stream, String query) { String fullQuery = DEFAULT_QUERY_STRING_1_1 + query + DEFAULT_QUERY_STRING_1_2; this.query = new StreamSource(new StringReader(fullQuery)); this.stream = stream; try { this.transformerFactory = org.apache.xalan.processor.TransformerFactoryImpl.newInstance(); this.transformer = transformerFactory.newTransformer(this.query); } catch (TransformerConfigurationException e) { System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage()); } } /** Applies the stylesheet to the stream. */ public QueryDocumentStream(DocumentStream stream, StreamSource source) { this.query = source; this.stream = stream; try { this.transformerFactory = org.apache.xalan.processor.TransformerFactoryImpl.newInstance(); this.transformer = transformerFactory.newTransformer(query); } catch (TransformerConfigurationException e) { System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage()); } } /** The default constructor. Searches for documents containing "the" */ public QueryDocumentStream(DocumentStream stream) { this.query = new StreamSource(new StringReader(DEFAULT_QUERY_STRING)); this.stream = stream; try { this.transformerFactory = org.apache.xalan.processor.TransformerFactoryImpl.newInstance(); this.transformer = transformerFactory.newTransformer(query); } catch (TransformerConfigurationException e) { System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage()); } } /** * Generates the nest document in the sequence * * @exception java.io.Exception when something goes wrong in the XML stuff * @return the next Document * @see org.w3c.dom.Document */ public Document nextDocument() throws Exception { if (hasNextDocument() == false) throw new Error("no more docs"); Document result = cached; cached = null; return result; } /** * Are there more documents ? * * @return true if there are more docs */ public boolean hasNextDocument() throws Exception { lookInStream(); return (cached != null); } /** * * * @exception java.io.IOException when ... * @param arg2 ... * @param arg1 ... * @return ... */ protected boolean lookInStream() throws Exception { while (stream.hasNextDocument() && cached == null) { Document candidate = stream.nextDocument(); DOMSource input = new DOMSource(candidate); DOMResult output = new DOMResult(); try { transformer.transform(input, output); // if the transformed document is not empty don't cache it if (output.getNode().getFirstChild() != null) cached = (Document) output.getNode(); } catch (TransformerConfigurationException e) { System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage()); } catch (TransformerException e) { System.err.println("XMLTransformer: couldn't transform the source: " + e.getMessage()); cached = null; return false; } catch (Exception e) { System.err.println("Exception: " + e.getMessage()); cached = null; return false; } if (false) { // debugging info try { StreamResult result = new StreamResult(System.out); Transformer transformer = transformerFactory.newTransformer(); Source source = new DOMSource(candidate); transformer.transform(source,result); System.out.print(" ==> "); if (cached != null) { Source source2 = new DOMSource(cached); transformer.transform(source2,result); } System.out.println(); System.out.println("======================================================"); } catch (TransformerConfigurationException e) { System.err.println("XMLTransformer: couldn't create transformer object: "+e.getMessage()); } catch (TransformerException e) { System.err.println("XMLTransformer: couldn't transform the source: " + e.getMessage()); cached = null; return false; } catch (Exception e) { System.err.println("Exception: " + e.getMessage()); cached = null; return false; } } } return cached == null; } /** * Generates an unbounded stream of documents */ public static void main(String args[]) throws Exception { TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); GeneratedDocumentStream gStream = new GeneratedDocumentStream(); QueryDocumentStream qStream = new QueryDocumentStream(gStream); while (qStream.hasNextDocument()) { Document document = qStream.nextDocument(); StreamResult result = new StreamResult(System.out); Source source = new DOMSource(document); transformer.transform(source,result); System.out.println(); System.out.println("======================================================"); } } }