source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/selfContained/FilesDocumentStream.java@ 25635

Last change on this file since 25635 was 25635, checked in by sjm84, 12 years ago

Fixing Greenstone 3's use (or lack thereof) of generics, this was done automatically so we may want to change it over time. This change will also auto-format any files that have not already been formatted.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.9 KB
Line 
1/*
2 * DocumentStreamFromFiles.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.selfContained;
20
21// XML classes
22import org.w3c.dom.Document;
23import org.xml.sax.InputSource;
24import javax.xml.transform.Source;
25import javax.xml.transform.Transformer;
26import javax.xml.transform.TransformerFactory;
27import javax.xml.transform.dom.DOMSource;
28import javax.xml.transform.stream.StreamResult;
29import org.apache.xerces.dom.DocumentImpl;
30import org.apache.xerces.dom.ElementImpl;
31import org.apache.xerces.dom.TextImpl;
32import org.w3c.dom.Document;
33
34// other java classes
35import java.io.File;
36import java.io.FilenameFilter;
37import java.io.FileReader;
38import java.io.IOException;
39import java.io.Reader;
40import java.util.Vector;
41
42
43
44/**
45 * DocumentStreamFromFiles creates a stream of documents from a directory or directories.
46 *
47 * @author <a href="http://www.cs.waikato.ac.nz/~say1/">stuart yeates</a> (<a href="mailto:[email protected]">s
48[email protected]</a>) at the <a href="http://www.nzdl.org">New Zealand Digital Library</a>
49 * @version $Revision: 25635 $
50 * @see java.io.File
51 * @see DocumentStream
52 */
53public class FilesDocumentStream
54 implements DocumentStream {
55 /** the files we've found so far */
56 Vector<String> files = new Vector<String>();
57 /** the directories we've found so far */
58 Vector<String> directories = new Vector<String>();
59
60 /**
61 * Filename constructor
62 *
63 * @exception java.io.IOException file access do
64 * @param filename the filename of the file or directory
65 */
66 public FilesDocumentStream(String filename) {
67 File file = new File(filename);
68 if (file.exists() && file.isDirectory())
69 directories.add(file.getAbsolutePath());
70 else
71 files.add(file.getAbsolutePath());
72 }
73 /**
74 * File constructor
75 *
76 * @exception java.io.IOException file access do
77 * @param file a handle to the file
78 */
79 public FilesDocumentStream(File file) {
80 if (file.exists() && file.isDirectory())
81 directories.add(file.getAbsolutePath());
82 else
83 files.add(file.getAbsolutePath());
84 }
85
86 /**
87 * A small inner class to filter out those files that don't
88 * end in ".xml" or ".XML"
89 */
90 class XMLFilenameFilter implements FilenameFilter {
91 /** Tests if a specified file should be included in a file list. */
92 public boolean accept(File dir, String name) {
93 String target = ".xml";
94 if (name.length() < target.length())
95 return false;
96 String extension = name.substring(name.length() - 4, name.length());
97 //System.out.println(name + " ==>> " + extension);
98 if (extension.equalsIgnoreCase(target)) {
99 return true;
100 } else {
101 return false;
102 }
103 }
104 }
105
106 /**
107 * Opens a new directory if there are no files are queued
108 *
109 * @exception java.io.IOException when the underlying file access does
110 */
111 protected void expandIfNecessary() {
112 while (directories.size() > 0) {
113 String dirname = directories.elementAt(0);
114 directories.removeElement(dirname);
115 File dir = new File(dirname);
116 if (!dir.exists()) throw new Error ("error in expand: expecting a directory " + dirname);
117 String[] fileArray = dir.list();
118 for (int i=0;i<fileArray.length;i++){
119 File file = new File(dir, fileArray[i]);
120 if (file.exists() && file.isDirectory())
121 directories.add(file.getPath());
122 }
123
124 fileArray = dir.list(new XMLFilenameFilter());
125 for (int i=0;i<fileArray.length;i++){
126 File file = new File(dir, fileArray[i]);
127 if (file.exists() && !file.isDirectory())
128 files.add(file.getPath());
129 }
130 if (files.size() > 0)
131 return;
132 }
133 }
134
135 /**
136 * Returns the next document
137 *
138 * @exception java.io.IOException when the underlying file access does
139 * @return the next document
140 */
141 public Document nextDocument()
142 throws Exception {
143
144 if (!hasNextDocument()) throw new Error("Doesn't have another Document");
145
146 String filename = files.elementAt(files.size() - 1);
147 files.removeElementAt(files.size() - 1);
148 File file = new File(filename);
149
150 Reader reader = new FileReader(file);
151 InputSource xml_source = new InputSource(reader);
152
153 XMLUtil.getDOMParser().parse(xml_source);
154 Document doc = XMLUtil.getDOMParser().getDocument();
155
156 return doc;
157 }
158
159 /**
160 * Is there another document ?
161 *
162 * @exception java.io.IOException when the underlying file access does
163 * @return the next document
164 */
165
166 public boolean hasNextDocument() throws Exception {
167 expandIfNecessary();
168 return (files.size() > 0);
169 }
170 /**
171 * Tests...
172 *
173 *
174 * @exception java.io.IOException when ...
175 * @param args the arguments ...
176 */
177
178 public static void main(String args[]) throws Exception
179 {
180
181 StreamResult result = new StreamResult(System.out);
182 TransformerFactory transformerFactory = TransformerFactory.newInstance();
183
184 FilesDocumentStream stream = new FilesDocumentStream(".");
185 while (stream.hasNextDocument()) {
186 Document document = stream.nextDocument();
187 Transformer transformer = transformerFactory.newTransformer();
188 Source source = new DOMSource(document);
189 transformer.transform(source,result);
190 System.out.println();
191 System.out.println();
192 }
193
194 }
195}
196
Note: See TracBrowser for help on using the repository browser.