source: trunk/gli/src/org/greenstone/gatherer/msm/GDMParser.java@ 6318

Last change on this file since 6318 was 5589, checked in by mdewsnip, 21 years ago

Nearly finished adding tooltips (and thank goodness for that).

  • Property svn:keywords set to Author Date Id Revision
File size: 7.2 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.msm;
38
39/**************************************************************************************
40 * Title: Gatherer
41 * Description: The Gatherer: a tool for gathering and enriching a digital collection.
42 * Company: The University of Waikato
43 * Written: / /02
44 * Revised: 20/08/02 Commented and Optimized.
45 * @author John Thompson, 9826509
46 * @version 2.3
47 **************************************************************************************/
48import java.io.BufferedReader;
49import java.io.File;
50import java.io.FileInputStream;
51import java.io.InputStream;
52import java.io.InputStreamReader;
53import java.io.Reader;
54import java.lang.IllegalArgumentException;
55import java.lang.ref.SoftReference;
56import java.util.ArrayList;
57import java.util.LinkedHashMap;
58import java.util.Map;
59import org.apache.xerces.parsers.DOMParser;
60import org.apache.xml.serialize.XMLSerializer;
61import org.apache.xml.serialize.OutputFormat;
62import org.w3c.dom.Document;
63import org.xml.sax.InputSource;
64
65/** Parses metadata.xml documents of the GreenstoneDirectoryMetadata variety, caching where possible. */
66// ####################################################################################
67// Optimization Saving
68// ####################################################################################
69// Vector -> ArrayList + Memory, + Processor (pos. - Processor)
70// Hashtable -> HashMap + Memory, + Processor
71// Removed extra global references + Memory (16k)
72// ####################################################################################
73public class GDMParser
74 extends LinkedHashMap {
75 /** A list of file names that we know do not actually belong to valid GDM xml files, so there not much point in ever trying to read them again. */
76 private ArrayList ignore = null;
77 /** The actual xerces parser used to read in xml documents. */
78 private DOMParser parser = null;
79 /** The default maximum cache size if max size not explicitly set. */
80 private int max_size = 25;
81 /** Default constructor. */
82 public GDMParser() {
83 super();
84 this.ignore = new ArrayList();
85 try {
86 parser = new DOMParser();
87 // Don't let it import external dtds. If it does it'll probably spit the dummy. If people try to use a poorly formated xml file more fool them.
88 parser.setFeature("http://xml.org/sax/features/validation", false);
89 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
90 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
91 parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
92 }
93 catch(Exception error) {
94 ///ystem.err.println("Fatal Error in GDMParser.init(): " + error);
95 error.printStackTrace();
96 System.exit(1);
97 }
98 }
99 /** Constructor with maximum size set.
100 * @param max_size The maximum size of the cache, as an <i>int</i>.
101 */
102 public GDMParser(int max_size) {
103 this();
104 this.max_size = max_size;
105 }
106 /** Destructor, clears cache and remove persistant global references. */
107 public void destroy() {
108 ignore.clear();
109 ignore = null;
110 parser = null;
111 clear();
112 }
113 /** Fetches the document for the given xml file. This may mean (re)parsing it or simply fetching it from cache.
114 * @param file The metadata.xml <strong>File</strong> you wish to get the document for.
115 * @return A <strong>Document</strong> which is sourced from file.
116 */
117 public Document parse(File file) {
118 ///ystem.err.println("Parse: " + file.getAbsolutePath());
119 Document result = null;
120 if(file.exists()) {
121 // Check if we've already parsed this file in an earlier attempt.
122 if(containsKey(file)) {
123 ///ystem.err.println("Already cached previously.");
124 //result = (Document) get(file);
125 SoftReference reference = (SoftReference) get(file);
126 if(reference != null) {
127 result = (Document) reference.get();
128 }
129 else {
130 ///ystem.err.println("Reference expired.");
131 }
132 }
133 // Check the ignore list and see if we've already detected this isn't a greenstone metadata file.
134 if(result == null && !ignore.contains(file)) {
135 ///ystem.err.println("Reparse file.");
136 // Of course we may not have, or it may have expired so...
137 try {
138 // Display progress dialog.
139 InputStream is = new FileInputStream(file);
140 InputStreamReader isr = new InputStreamReader(is);
141 Reader r = new BufferedReader(isr);
142 InputSource isc = new InputSource(r);
143 parser.parse(isc); // Slow.
144 Document document = parser.getDocument();
145 // First test. Check we have a GreenstoneDirectoryMetadata file, or for the older version DirectoryMetadata.
146 if(!document.getDoctype().getName().equals("GreenstoneDirectoryMetadata") && !document.getDoctype().getName().equals("DirectoryMetadata")) {
147 ///ystem.err.println("Adding to ignore list.");
148 // Add to ignore list. Not a gdm file.
149 ignore.add(file);
150 }
151 // Cache document.
152 else {
153 ///ystem.err.println("Adding to cache.");
154 put(file, new SoftReference(document));
155 result = document;
156 }
157 }
158 catch (Exception error) {
159 ///ystem.err.println("Error! " + error);
160 error.printStackTrace();
161 }
162 }
163 else {
164 ///ystem.err.println("File on ignore list.");
165 }
166 }
167 else {
168 ///ystem.err.println("File does not exist!");
169 }
170 return result;
171 }
172 /** Automatically called by the LinkedHashMap object whenever an object is added, to determine whether it should remove the oldest entry.
173 * @param eldest The eldest <strong>Map.Entry</strong> which may mean in terms of age, or in terms of usage.
174 * @return <i>true</i> if the given entry should be removed, <i>false</i> otherwise.
175 */
176 protected boolean removeEldestEntry(Map.Entry eldest) {
177 return size() > max_size;
178 }
179}
Note: See TracBrowser for help on using the repository browser.