1 | /**
|
---|
2 | *#########################################################################
|
---|
3 | *
|
---|
4 | * A component of the Gatherer application, part of the Greenstone digital
|
---|
5 | * library suite from the New Zealand Digital Library Project at the
|
---|
6 | * University of Waikato, New Zealand.
|
---|
7 | *
|
---|
8 | * Author: John Thompson, Greenstone Digital Library, University of Waikato
|
---|
9 | *
|
---|
10 | * Copyright (C) 1999 New Zealand Digital Library Project
|
---|
11 | *
|
---|
12 | * This program is free software; you can redistribute it and/or modify
|
---|
13 | * it under the terms of the GNU General Public License as published by
|
---|
14 | * the Free Software Foundation; either version 2 of the License, or
|
---|
15 | * (at your option) any later version.
|
---|
16 | *
|
---|
17 | * This program is distributed in the hope that it will be useful,
|
---|
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
20 | * GNU General Public License for more details.
|
---|
21 | *
|
---|
22 | * You should have received a copy of the GNU General Public License
|
---|
23 | * along with this program; if not, write to the Free Software
|
---|
24 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
25 | *########################################################################
|
---|
26 | */
|
---|
27 | package org.greenstone.gatherer.msm.parsers;
|
---|
28 | /**************************************************************************************
|
---|
29 | * Written: ??/??/02
|
---|
30 | * Revised: ??/??/02 - Commented
|
---|
31 | * 25/07/03 - Fix to allow any valid greenstone metadata.xml to be imported from, not just those that occur within a collection. This functionality is accidental and caused by GLI attempting to find a collect.cfg to extract hierarchy file information from, and failing.
|
---|
32 | **************************************************************************************/
|
---|
33 | import java.io.*;
|
---|
34 | import java.lang.ref.*;
|
---|
35 | import java.net.*;
|
---|
36 | import java.util.*;
|
---|
37 | import java.util.regex.*;
|
---|
38 | import javax.swing.*;
|
---|
39 | import javax.swing.tree.*;
|
---|
40 | import org.greenstone.gatherer.Gatherer;
|
---|
41 | import org.greenstone.gatherer.cdm.CommandTokenizer;
|
---|
42 | import org.greenstone.gatherer.file.FileNode;
|
---|
43 | import org.greenstone.gatherer.gui.WarningDialog;
|
---|
44 | import org.greenstone.gatherer.msm.ElementWrapper;
|
---|
45 | import org.greenstone.gatherer.msm.Metadata;
|
---|
46 | import org.greenstone.gatherer.msm.MetadataParser;
|
---|
47 | import org.greenstone.gatherer.msm.MSMUtils;
|
---|
48 | import org.greenstone.gatherer.util.DecodeHTMLReader;
|
---|
49 | import org.greenstone.gatherer.util.StaticStrings;
|
---|
50 | import org.greenstone.gatherer.util.Utility;
|
---|
51 | import org.greenstone.gatherer.valuetree.GValueModel;
|
---|
52 | import org.greenstone.gatherer.valuetree.GValueNode;
|
---|
53 | import org.w3c.dom.*;
|
---|
54 | /** Provides a metadata parser implementation that knows how to locate, prepare for, then import metadata from a previous Greenstone collection. Is aware of such factors as the presence of Metadata Set files and hierarchy files. Updates the profiler where possible to allow for faster subsequent imports from a certain collection. Caches all the information about encountered collections in CollectCFG objects which are softly cached (ie are cached, but are reclaimed before an OutOfMemory exception would be thrown).
|
---|
55 | * @author John Thompson, Greenstone Digital Library, University of Waikato
|
---|
56 | * @version 2.3
|
---|
57 | */
|
---|
58 | public class GreenstoneMetadataParser
|
---|
59 | extends LinkedHashMap
|
---|
60 | implements MetadataParser {
|
---|
61 |
|
---|
62 | static final private int MAX_CFG_CACHE_SIZE = 10;
|
---|
63 | static final private int MAX_GDM_CACHE_SIZE = 10;
|
---|
64 | /** The default name and location for a collection configuration file (presuming that a collection file prefix will be added). */
|
---|
65 | static final private String CONFIG_FILENAME = "etc" + File.separator + "collect.cfg";
|
---|
66 | /** The pattern to match when searching for directory level assignments. */
|
---|
67 | static final private String DIRECTORY_FILENAME = ".*";
|
---|
68 | static final private String DIRECTORY_FILENAME_SUFFIX = "/.*";
|
---|
69 | static final private String DESCRIPTION_ELEMENT = "Description";
|
---|
70 | static final private String FILENAME_ELEMENT = "FileName";
|
---|
71 | static final private String FILESET_ELEMENT = "FileSet";
|
---|
72 | /** The name of a gdm file. */
|
---|
73 | static final private String GIMPORT = "gimport";
|
---|
74 | static final private String IMPORT = "import";
|
---|
75 | static final private String METADATA_ELEMENT = "Metadata";
|
---|
76 | static final private String METADATA_XML_FILENAME = "metadata.xml";
|
---|
77 | static final private String MODE_ATTRIBUTE = "mode";
|
---|
78 | static final private String NAME_ATTRIBUTE = "name";
|
---|
79 | static final private String SEPARATOR = "/";
|
---|
80 |
|
---|
81 | /** A list of the collect.cfg paths that we should ignore. */
|
---|
82 | private ArrayList ignore_list = new ArrayList();
|
---|
83 | /** Has this process been cancelled. */
|
---|
84 | private boolean dialog_cancelled = false;
|
---|
85 | /** A cache of previously parsed collection configuration files. */
|
---|
86 | private CollectCFGCache cfg_cache = new CollectCFGCache();
|
---|
87 | /** A mapping from BasicMetadata to their fully enabled Metadata incarnation. */
|
---|
88 | private HashMap transform = new HashMap();
|
---|
89 |
|
---|
90 | /** Default constructor needed for dynamic class loading. */
|
---|
91 | public GreenstoneMetadataParser() {
|
---|
92 | }
|
---|
93 | /** Locate and import any metadata parsed by this metadata parser given the file involved and its previous incarnation. */
|
---|
94 | public boolean process(FileNode destination, FileNode origin, boolean folder_level, boolean dummy_run) {
|
---|
95 | ///atherer.println("GreenstoneMetadataParser: Process " + origin + ": ");
|
---|
96 | int counter = 0;
|
---|
97 | dialog_cancelled = false;
|
---|
98 |
|
---|
99 | // 1. Determine what collection the file is in, and load/parse the appropriate collect.cfg. Cache collect.cfg object.
|
---|
100 | ///ystem.err.print("1 ");
|
---|
101 | // Start at the origin node file. If its a file get its parent directory.
|
---|
102 | File collection_dir = origin.getFile();
|
---|
103 | if(collection_dir.isFile()) {
|
---|
104 | collection_dir = collection_dir.getParentFile();
|
---|
105 | }
|
---|
106 | // We're currently in the importing directory so we'll go one more step up.
|
---|
107 | collection_dir = collection_dir.getParentFile();
|
---|
108 | // We are looking for a directory which contains a etc/collect.cfg file and either an import or a gimport directory.
|
---|
109 | boolean found = false;
|
---|
110 | while(!found && collection_dir != null) {
|
---|
111 | File possible_cfg_file = new File(collection_dir, CONFIG_FILENAME);
|
---|
112 | File possible_gimport_directory = new File(collection_dir, GIMPORT);
|
---|
113 | File possible_import_directory = new File(collection_dir, IMPORT);
|
---|
114 | if(possible_cfg_file.exists() && (possible_gimport_directory.exists() || possible_import_directory.exists())) {
|
---|
115 | found = true;
|
---|
116 | ///ystem.err.println("Found greenstone collection at " + collection_dir.getAbsolutePath());
|
---|
117 | }
|
---|
118 | else {
|
---|
119 | collection_dir = collection_dir.getParentFile();
|
---|
120 | }
|
---|
121 | }
|
---|
122 |
|
---|
123 | // Now retrieve the configuration file if there is one.
|
---|
124 | CollectCFG collect_cfg = null;
|
---|
125 | if(collection_dir != null) {
|
---|
126 | File collect_cfg_file = new File(collection_dir, CONFIG_FILENAME);
|
---|
127 | if(collect_cfg_file.exists()) {
|
---|
128 | collect_cfg = cfg_cache.get(collect_cfg_file);
|
---|
129 | }
|
---|
130 | }
|
---|
131 |
|
---|
132 | // 2. If a collection configuration file was found, attempt to merge in any mdses and make note of those that are successfully imported (by removing reference from collect.cfg).
|
---|
133 | ///ystem.err.print("2 ");
|
---|
134 | if(collect_cfg != null) {
|
---|
135 | ArrayList mdses = collect_cfg.getMetadataSets();
|
---|
136 | for(int i = 0; i < mdses.size(); i++) {
|
---|
137 | File mds_file = (File) mdses.get(i);
|
---|
138 | Gatherer.c_man.getCollection().msm.importMDS(mds_file, false);
|
---|
139 | }
|
---|
140 | mdses.clear();
|
---|
141 | mdses = null;
|
---|
142 | }
|
---|
143 |
|
---|
144 | // 3. Locate all of the metadata.xml files that may have an affect on the origin file. Make sure the metadata.xml closest to the origin files directory is last (to ensure property inheritance regarding accumulate/overwrite).
|
---|
145 | ///ystem.err.print("3 ");
|
---|
146 | ArrayList search_files = new ArrayList();
|
---|
147 | File file = origin.getFile();
|
---|
148 | String filename = null;
|
---|
149 | boolean file_level;
|
---|
150 | if(file.isFile()) {
|
---|
151 | file_level = false;
|
---|
152 | filename = file.getName();
|
---|
153 | file = file.getParentFile();
|
---|
154 | }
|
---|
155 | else {
|
---|
156 | file_level = true;
|
---|
157 | }
|
---|
158 | while(file != null && (collection_dir == null || !file.equals(collection_dir))) {
|
---|
159 | File test_file = new File(file, Utility.METADATA_XML);
|
---|
160 | if(test_file.exists()) {
|
---|
161 | search_files.add(0, new MetadataXMLFileSearch(test_file, filename));
|
---|
162 | }
|
---|
163 | if(filename != null) {
|
---|
164 | filename = file.getName() + SEPARATOR + filename;
|
---|
165 | }
|
---|
166 | else {
|
---|
167 | filename = file.getName();
|
---|
168 | }
|
---|
169 | file = file.getParentFile();
|
---|
170 | }
|
---|
171 | filename = null;
|
---|
172 | file = null;
|
---|
173 | // Start with an initially empty ArrayList of metadata
|
---|
174 | ArrayList metadata = new ArrayList();
|
---|
175 | // Now search each of these metadata xml for metadata, remembering to accumulate or overwrite as we go along.
|
---|
176 | for(int i = 0; i < search_files.size(); i++) {
|
---|
177 | MetadataXMLFileSearch a_search = (MetadataXMLFileSearch) search_files.get(i);
|
---|
178 | ///ystem.err.println("Search " + a_search.file.getAbsolutePath() + " for " + (a_search.filename != null ? a_search.filename : ".*"));
|
---|
179 | // Retrieve the document
|
---|
180 | BasicGDMDocument document = getDocument(a_search.file);
|
---|
181 | if(document != null) {
|
---|
182 | // If this is a dummy run, our original source file is actually the metadata.xml file and we retrieve all metadata for this collection, as if accumulated!
|
---|
183 | if(dummy_run) {
|
---|
184 | metadata = document.getAllMetadata();
|
---|
185 | }
|
---|
186 | else {
|
---|
187 | metadata = document.getMetadata(a_search.filename, metadata, folder_level);
|
---|
188 | }
|
---|
189 | document = null;
|
---|
190 | }
|
---|
191 | a_search = null;
|
---|
192 | }
|
---|
193 | search_files = null;
|
---|
194 | // Finally assign the metadata
|
---|
195 | ///ystem.err.println("Found " + metadata.size() + " pieces of metadata for " + destination);
|
---|
196 | if(metadata.size() > 0) {
|
---|
197 | addMetadata(origin, destination, metadata, collection_dir, collect_cfg, dummy_run);
|
---|
198 | }
|
---|
199 | return dialog_cancelled;
|
---|
200 | }
|
---|
201 |
|
---|
202 | protected boolean removeEldestEntry(java.util.Map.Entry entry) {
|
---|
203 | return (size() > MAX_GDM_CACHE_SIZE);
|
---|
204 | }
|
---|
205 |
|
---|
206 | private void addMetadata(FileNode origin, FileNode destination, ArrayList metadata, File collection_dir, CollectCFG collect_cfg, boolean dummy_run) {
|
---|
207 | // before we try to addMetadata, we need to check that there are some metadata sets for the collection - otherwise we cant add or import
|
---|
208 | Vector meta_sets = Gatherer.c_man.getCollection().msm.getSets(false);
|
---|
209 | if (meta_sets.size()==0) {
|
---|
210 | ///ystem.out.println("GreenstoneMetadataParser:Error: we have been asked to add metadata but there are no existing sets");
|
---|
211 | // print the warning dialog
|
---|
212 | WarningDialog dialog = new WarningDialog("warning.MissingMDS", true);
|
---|
213 | if (dialog.display() == JOptionPane.CANCEL_OPTION) {
|
---|
214 | // the user has cancelled
|
---|
215 | dialog_cancelled = true;
|
---|
216 | }
|
---|
217 | return;
|
---|
218 | }
|
---|
219 | ///ystem.err.print("6 ");
|
---|
220 | // Used in a complicated test later on.
|
---|
221 | for(int i = 0; !dialog_cancelled && i < metadata.size(); i++) {
|
---|
222 | BasicMetadata basic_metadata = ((BasicMetadata) metadata.get(i)).copy();
|
---|
223 | BasicMetadata metadatum = (BasicMetadata) metadata.get(i);
|
---|
224 | metadatum.collection = collection_dir; // May be null. Doesn't matter.
|
---|
225 | Metadata final_metadata = null;
|
---|
226 | // If this BasicMetadata already exists in the transform cache then we can save ourselves a lot of work.
|
---|
227 | SoftReference reference = (SoftReference) transform.get(basic_metadata);
|
---|
228 | if(reference != null) {
|
---|
229 | final_metadata = (Metadata) reference.get();
|
---|
230 | }
|
---|
231 | if(final_metadata == null) {
|
---|
232 | ///ystem.err.println("No existing Metadata object for BasicMetadata: " + basic_metadata);
|
---|
233 | // 6a. Check if an hfile is associated with this metadata, and if so load it, cache it in the collection.cfg object, then resolve metadata value index. Of course we can only do this if a collection configuration file was found in the first place.
|
---|
234 | if(collect_cfg != null) {
|
---|
235 | HFile h_file = collect_cfg.getHFile(metadatum.element);
|
---|
236 | if(h_file != null && !dummy_run) {
|
---|
237 | ///ystem.err.print(metadata.value + " maps to ");
|
---|
238 | metadatum.value = h_file.getValue(metadatum.value);
|
---|
239 | ///ystem.err.println(metadatum.value);
|
---|
240 | }
|
---|
241 | h_file = null;
|
---|
242 | }
|
---|
243 | // 6b. Check if there is a profile regarding the current metadata. The profile may be stored for the collection directory, or if no such directory is available, then try the ancestor folders of the origin file.
|
---|
244 | ///ystem.err.println("Retrieve existing action: " + collection_dir.getAbsolutePath() + ", " + metadatum.element);
|
---|
245 | if(collection_dir != null) {
|
---|
246 | // Note that the first test is whether a profile action exist, while the 'getAction' can return null as the profile action.
|
---|
247 | if(Gatherer.c_man.getCollection().msm.profiler.containsAction(collection_dir.getAbsolutePath(), metadatum.element)) {
|
---|
248 | String new_element_name = Gatherer.c_man.getCollection().msm.profiler.getAction(collection_dir.getAbsolutePath(), metadatum.element);
|
---|
249 | ///ystem.err.println("Profile result = " + new_element_name);
|
---|
250 | if(new_element_name == null) {
|
---|
251 | metadatum = null;
|
---|
252 | }
|
---|
253 | else {
|
---|
254 | metadatum.element = new_element_name;
|
---|
255 | }
|
---|
256 | new_element_name = null;
|
---|
257 | }
|
---|
258 | }
|
---|
259 | else {
|
---|
260 | boolean found = false;
|
---|
261 | File current_folder = origin.getFile().getParentFile();
|
---|
262 | while(!found && metadatum != null && current_folder != null) {
|
---|
263 | if(Gatherer.c_man.getCollection().msm.profiler.containsAction(current_folder.getAbsolutePath(), metadatum.element)) {
|
---|
264 | found = true;
|
---|
265 | String new_element_name = Gatherer.c_man.getCollection().msm.profiler.getAction(current_folder.getAbsolutePath(), metadatum.element);
|
---|
266 | ///ystem.err.println("Profile result = " + new_element_name);
|
---|
267 | if(new_element_name == null) {
|
---|
268 | metadatum = null;
|
---|
269 | }
|
---|
270 | else {
|
---|
271 | metadatum.element = new_element_name;
|
---|
272 | }
|
---|
273 | new_element_name = null;
|
---|
274 | }
|
---|
275 | else {
|
---|
276 | current_folder = current_folder.getParentFile();
|
---|
277 | }
|
---|
278 | }
|
---|
279 | current_folder = null;
|
---|
280 | }
|
---|
281 | ///atherer.println("Assigning metadatum.");
|
---|
282 | if(metadatum != null) {
|
---|
283 | // 6c. Try to add metadata. If there is no matching metadata element:
|
---|
284 | ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(metadatum.element, true);
|
---|
285 | // Arg. The element returned may come from the Greenstone dls, which of course should never be involved during importing. To solve check the namespace isn't "" and if it is nullify the element. Nullify. NULLIFY, Bwuhahahaha...
|
---|
286 | if(element != null && element.getNamespace().equals("")) {
|
---|
287 | element = null;
|
---|
288 | }
|
---|
289 | // 6ci. If no match exists, prompt the user to add/merge with specific metadata element. The user can also choose to ignore this metadata.
|
---|
290 | if(element == null) {
|
---|
291 | element = selectElement(metadatum.element);
|
---|
292 | if(!dialog_cancelled) {
|
---|
293 | // 6ciii. If either of the above work, remember to add to profile.
|
---|
294 | if(element == null) {
|
---|
295 | ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadatum.element + ", null");
|
---|
296 | if(collection_dir != null) {
|
---|
297 | Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadatum.element, null);
|
---|
298 | }
|
---|
299 | else {
|
---|
300 | Gatherer.c_man.getCollection().msm.profiler.addAction(origin.getFile().getParentFile().getAbsolutePath(), metadatum.element, null);
|
---|
301 | }
|
---|
302 | }
|
---|
303 | else {
|
---|
304 | ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadatum.element + ", " + element.getName());
|
---|
305 | if(collection_dir != null) {
|
---|
306 | Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadatum.element, element.getName());
|
---|
307 | }
|
---|
308 | else {
|
---|
309 | Gatherer.c_man.getCollection().msm.profiler.addAction(origin.getFile().getParentFile().getAbsolutePath(), metadatum.element, element.getName());
|
---|
310 | }
|
---|
311 | }
|
---|
312 | }
|
---|
313 | }
|
---|
314 | // - Add metadata
|
---|
315 | if(!dummy_run && element != null && !dialog_cancelled) {
|
---|
316 | ///ystem.err.println("Retrieve the value tree for " + element.toString());
|
---|
317 | GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(element);
|
---|
318 | if(model != null) {
|
---|
319 | // One little 'fix' for importing from the demo or dls files. The Title metadata found in the metadata.xml isn't used in preference for the automatically extracted Titles. However we want to use them, so we should remove '.*(<filename>)$' for a certain file <filename>.
|
---|
320 | String raw_value = metadatum.value.trim();
|
---|
321 | String filename_munged = destination.getFile().getName();
|
---|
322 | int index = -1;
|
---|
323 | if((index = filename_munged.indexOf(".")) != -1) {
|
---|
324 | filename_munged = filename_munged.substring(0, index);
|
---|
325 | }
|
---|
326 | filename_munged = "(" + filename_munged + ")";
|
---|
327 | if(raw_value.endsWith(filename_munged)) {
|
---|
328 | raw_value = (raw_value.substring(0, raw_value.length() - filename_munged.length())).trim();
|
---|
329 | }
|
---|
330 | GValueNode node = model.addValue(raw_value);
|
---|
331 | final_metadata = new Metadata(element, node);
|
---|
332 | ///ystem.err.println("Adding final metadata: " + metadatum.toString());
|
---|
333 | node = null;
|
---|
334 | }
|
---|
335 | model = null;
|
---|
336 | }
|
---|
337 | element = null;
|
---|
338 | }
|
---|
339 | // If we have successfully created a Metadata from the BasicMetadata, store it
|
---|
340 | if(final_metadata != null && !dialog_cancelled) {
|
---|
341 | transform.put(basic_metadata, new SoftReference(final_metadata));
|
---|
342 | ///ystem.err.println("Add a Metadata object for BasicMetadata: " + basic_metadata);
|
---|
343 | }
|
---|
344 | }
|
---|
345 | else {
|
---|
346 | ///ystem.err.println("Found a Metadata object for BasicMetadata: " + basic_metadata);
|
---|
347 | }
|
---|
348 | if(!dummy_run && final_metadata != null && !dialog_cancelled) {
|
---|
349 | final_metadata.setAccumulate(metadatum.accumulates);
|
---|
350 | // Now we can finally add the metadata.
|
---|
351 | ///ystem.err.println("Adding Metadata: " + final_metadata);
|
---|
352 | Gatherer.c_man.getCollection().msm.fireMetadataChanged(0, destination, null, final_metadata);
|
---|
353 | }
|
---|
354 | // Otherwise there is no way to add this metadata. No value model no metadata value.
|
---|
355 | final_metadata = null;
|
---|
356 | metadatum = null;
|
---|
357 | }
|
---|
358 | }
|
---|
359 |
|
---|
360 | /** Determine the different suffix between two string.
|
---|
361 | * @param base_str The base <strong>String</strong>, expected to be the short of the two strings provided.
|
---|
362 | * @param target_str The target <strong>String</strong>, whose differing suffix is returned.
|
---|
363 | * @return A <strong>String</strong> containing the suffix from target which is different from base.
|
---|
364 | */
|
---|
365 | private String diff(String base_str, String target_str) {
|
---|
366 | StringTokenizer base_tokenizer = new StringTokenizer(base_str, File.separator);
|
---|
367 | StringTokenizer target_tokenizer = new StringTokenizer(target_str, File.separator);
|
---|
368 | String base = null;
|
---|
369 | String target = null;
|
---|
370 | while(base_tokenizer.hasMoreTokens() && (base = base_tokenizer.nextToken()).equals((target = target_tokenizer.nextToken()))) {
|
---|
371 | }
|
---|
372 | StringBuffer result = new StringBuffer(target);
|
---|
373 | while(target_tokenizer.hasMoreTokens()) {
|
---|
374 | result.append(File.separator);
|
---|
375 | result.append(target_tokenizer.nextToken());
|
---|
376 | }
|
---|
377 | return result.toString();
|
---|
378 | }
|
---|
379 |
|
---|
380 | /** Retrieve the BasicGDMDocument found at the given file, or null if there is no such file or if it isn't a valid BasicGDMDocument. */
|
---|
381 | private BasicGDMDocument getDocument(File file) {
|
---|
382 | ///ystem.err.println("Get Document at: " + file.getAbsolutePath());
|
---|
383 | BasicGDMDocument document = null;
|
---|
384 | if(!ignore_list.contains(file) && file.exists()) {
|
---|
385 | // Check cache
|
---|
386 | SoftReference reference = (SoftReference) get(file);
|
---|
387 | if(reference != null) {
|
---|
388 | ///ystem.err.println("Hit!!");
|
---|
389 | document = (BasicGDMDocument) reference.get();
|
---|
390 | reference = null;
|
---|
391 | }
|
---|
392 | // If that didn't work try to parse in the document
|
---|
393 | if(document == null) {
|
---|
394 | ///ystem.err.println("Miss or stale reference.");
|
---|
395 | document = new BasicGDMDocument(file);
|
---|
396 | if(document.isValid()) {
|
---|
397 | put(file, new SoftReference(document));
|
---|
398 | }
|
---|
399 | else {
|
---|
400 | ///ystem.err.println(file.getAbsolutePath() + " is not a valid GDM XML file.");
|
---|
401 | ignore_list.add(file);
|
---|
402 | document = null;
|
---|
403 | }
|
---|
404 | }
|
---|
405 | }
|
---|
406 | else {
|
---|
407 | ///ystem.err.println("Ignoring file or file doesn't exists.");
|
---|
408 | }
|
---|
409 | return document;
|
---|
410 | }
|
---|
411 |
|
---|
412 |
|
---|
413 | /** Display a prompt allowing a user to select a metadata element to attempt to force add/merge or ignore a metadata element to. For instance an old version of a metadata.xml from the DLS collection might have an assigned metadata value "Publisher=EC Courier", however Publisher won't automatically match to any metadata set. This prompt will be displayed, and some effort will be made to systematically locate the appropriate set. In this case this should be the DLS metadata set as dls.Publisher should be the closest match. Regardless the element selected is returned.
|
---|
414 | * @param element_name The name of the element we are trying to add, as a <strong>String</strong>.
|
---|
415 | * @return The <strong>ElementWrapper</strong> choosen by the user, or <i>null</i> to skip this metadata element.
|
---|
416 | */
|
---|
417 | private ElementWrapper selectElement(String element_name) {
|
---|
418 | ElementWrapper result = Gatherer.c_man.getCollection().msm.prompt.selectElement(element_name);
|
---|
419 | dialog_cancelled = Gatherer.c_man.getCollection().msm.prompt.wasDialogCancelled();
|
---|
420 | return result;
|
---|
421 | }
|
---|
422 |
|
---|
423 | /** A 'basic' version of the more complete GDMDocument used elsewhere, this object provides the same functionality except that it doesn't use Metadata objects. These objects require live references to elements within the MetadataSetManager and GValueModels, but these may not yet exist (and indeed may never exist) for metadata parsed from metadata.xml's outside of our current collection. Thus this class returns a String (or an ArrayList of Strings) when asked for the metadata associated with a certain file. Also notice that this class provides no constructor method for creating a blank document, nor does it ever need a reference to the Gatherer.*/
|
---|
424 | private class BasicGDMDocument
|
---|
425 | extends HashMap {
|
---|
426 | /** The document this class sources its data from. */
|
---|
427 | private Document base_document;
|
---|
428 | /** This constructor takes the original document and parsed out and stores metadata with its association to filenames. */
|
---|
429 | public BasicGDMDocument(File file) {
|
---|
430 | ///ystem.err.println("New BasicGDMDocument: " + file.getAbsolutePath());
|
---|
431 | base_document = Utility.parse(file.getAbsolutePath(), false);
|
---|
432 | }
|
---|
433 | /** Retrieve all of the metadata in this file. */
|
---|
434 | public ArrayList getAllMetadata() {
|
---|
435 | ArrayList metadatum = new ArrayList();
|
---|
436 | // Don't search the cache as this would never have been added.
|
---|
437 | try {
|
---|
438 | // Retrieve the document element.
|
---|
439 | Element directorymetadata_element = base_document.getDocumentElement();
|
---|
440 | // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching.
|
---|
441 | NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
|
---|
442 | for(int i = 0; i < fileset_elements.getLength(); i++) {
|
---|
443 | Element fileset_element = (Element) fileset_elements.item(i);
|
---|
444 | NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
|
---|
445 | for(int j = 0; j < filename_elements.getLength(); j++) {
|
---|
446 | Element filename_element = (Element) filename_elements.item(j);
|
---|
447 | // If they match add all of the metadata found in the Description child element, overwriting any metadata with the same element
|
---|
448 | NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
|
---|
449 | for(int k = 0; k < description_elements.getLength(); k++) {
|
---|
450 | Element description_element = (Element) description_elements.item(k);
|
---|
451 | NodeList metadata_elements = description_element.getElementsByTagName(METADATA_ELEMENT);
|
---|
452 | for(int l = 0; l < metadata_elements.getLength(); l++) {
|
---|
453 | Element metadata_element = (Element) metadata_elements.item(l);
|
---|
454 | String element = metadata_element.getAttribute(NAME_ATTRIBUTE);
|
---|
455 | BasicMetadata metadata = new BasicMetadata(element, Utility.METADATA_XML, true);
|
---|
456 | // Remove any previous values for this metadata element.
|
---|
457 | for(int m = metadatum.size() - 1; m >= 0; m--) {
|
---|
458 | BasicMetadata old_metadata = (BasicMetadata) metadatum.get(m);
|
---|
459 | if(old_metadata.element.equals(element)) {
|
---|
460 | metadatum.remove(m);
|
---|
461 | }
|
---|
462 | old_metadata = null;
|
---|
463 | }
|
---|
464 | // Add the completed metadata and clean up
|
---|
465 | metadatum.add(metadata);
|
---|
466 | metadata = null;
|
---|
467 | element = null;
|
---|
468 | metadata_element = null;
|
---|
469 | }
|
---|
470 | metadata_elements = null;
|
---|
471 | description_element = null;
|
---|
472 | }
|
---|
473 | description_elements = null;
|
---|
474 | filename_element = null;
|
---|
475 | }
|
---|
476 | filename_elements = null;
|
---|
477 | fileset_element = null;
|
---|
478 | }
|
---|
479 | fileset_elements = null;
|
---|
480 | directorymetadata_element = null;
|
---|
481 | }
|
---|
482 | catch (Exception error) {
|
---|
483 | Gatherer.self.printStackTrace(error);
|
---|
484 | }
|
---|
485 | return metadatum;
|
---|
486 | }
|
---|
487 |
|
---|
488 | /** Retrieve any metadata associated with a certain file. If filename is null we are attempting to find directory level metadata. */
|
---|
489 | public ArrayList getMetadata(String filename, ArrayList metadatum_so_far, boolean folder_level) {
|
---|
490 | ///ystem.err.println("Retrieving metadata for: " + filename + " [" + folder_level + "]");
|
---|
491 | ArrayList metadatum = null;
|
---|
492 | // We start by attempting to retrieve this metadata from the cache.
|
---|
493 | if(filename != null) {
|
---|
494 | metadatum = (ArrayList) get(filename);
|
---|
495 | }
|
---|
496 | else {
|
---|
497 | metadatum = (ArrayList) get(DIRECTORY_FILENAME);
|
---|
498 | }
|
---|
499 | // If that failed we consult the document for metadata.
|
---|
500 | if(metadatum == null) {
|
---|
501 | metadatum = new ArrayList();
|
---|
502 | if(metadatum_so_far == null) {
|
---|
503 | metadatum = new ArrayList();
|
---|
504 | }
|
---|
505 | else {
|
---|
506 | metadatum = metadatum_so_far;
|
---|
507 | }
|
---|
508 | try {
|
---|
509 | // Retrieve the document element.
|
---|
510 | Element directorymetadata_element = base_document.getDocumentElement();
|
---|
511 | // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching.
|
---|
512 | NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
|
---|
513 | for(int i = 0; i < fileset_elements.getLength(); i++) {
|
---|
514 | Element fileset_element = (Element) fileset_elements.item(i);
|
---|
515 | NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
|
---|
516 | for(int j = 0; j < filename_elements.getLength(); j++) {
|
---|
517 | Element filename_element = (Element) filename_elements.item(j);
|
---|
518 | String filename_text = MSMUtils.getValue(filename_element);
|
---|
519 | if(isMatchingFileSet(filename, filename_text, folder_level)) {
|
---|
520 | ///ystem.err.println("Match: " + (filename != null ? filename : ".*") + " => " + filename_text);
|
---|
521 | // If they match add all of the metadata found in the Description child element, remembering to abide by desired mode (accumulate vs. overwrite).
|
---|
522 | NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT);
|
---|
523 | for(int k = 0; k < description_elements.getLength(); k++) {
|
---|
524 | Element description_element = (Element) description_elements.item(k);
|
---|
525 | NodeList metadata_elements = description_element.getElementsByTagName(METADATA_ELEMENT);
|
---|
526 | for(int l = 0; l < metadata_elements.getLength(); l++) {
|
---|
527 | Element metadata_element = (Element) metadata_elements.item(l);
|
---|
528 | String element = metadata_element.getAttribute(NAME_ATTRIBUTE);
|
---|
529 | ///ystem.err.println("Found element: " + element);
|
---|
530 | //String language = metadata_element.getAttribute("language");
|
---|
531 | String mode = metadata_element.getAttribute(MODE_ATTRIBUTE);
|
---|
532 | // Add the new metadata to our list of metadata for this target file.
|
---|
533 | String value = Utility.stripNL(MSMUtils.getValue(metadata_element));
|
---|
534 | ///ystem.err.println("Found value: " + element);
|
---|
535 | BasicMetadata metadata = new BasicMetadata(element, value, mode.equals("accumulate"));
|
---|
536 | // If mode is overwrite, then remove any previous values for this metadata element.
|
---|
537 | if(!metadata.accumulates) {
|
---|
538 | for(int m = metadatum.size() - 1; m >= 0; m--) {
|
---|
539 | BasicMetadata old_metadata = (BasicMetadata) metadatum.get(m);
|
---|
540 | if(old_metadata.element.equals(element)) {
|
---|
541 | metadatum.remove(m);
|
---|
542 | }
|
---|
543 | old_metadata = null;
|
---|
544 | }
|
---|
545 | }
|
---|
546 | mode = null;
|
---|
547 |
|
---|
548 | // Add the completed metadata and clean up
|
---|
549 | metadatum.add(metadata);
|
---|
550 | metadata = null;
|
---|
551 | value = null;
|
---|
552 | element = null;
|
---|
553 | metadata_element = null;
|
---|
554 | }
|
---|
555 | metadata_elements = null;
|
---|
556 | description_element = null;
|
---|
557 | }
|
---|
558 | description_elements = null;
|
---|
559 | }
|
---|
560 | else {
|
---|
561 | ///ystem.err.println("No Match!");
|
---|
562 | }
|
---|
563 | filename_text = null;
|
---|
564 | filename_element = null;
|
---|
565 | }
|
---|
566 | filename_elements = null;
|
---|
567 | fileset_element = null;
|
---|
568 | }
|
---|
569 | fileset_elements = null;
|
---|
570 | directorymetadata_element = null;
|
---|
571 | }
|
---|
572 | catch (Exception error) {
|
---|
573 | Gatherer.self.printStackTrace(error);
|
---|
574 | }
|
---|
575 | // Cache the result, given that these external metadata.xmls are taken to be static at the time of reading (if you happen to be sourcing information from a opened collection that someone is working on, too bad.
|
---|
576 | if(filename != null) {
|
---|
577 | put(filename, metadatum);
|
---|
578 | }
|
---|
579 | else {
|
---|
580 | put(DIRECTORY_FILENAME, metadatum);
|
---|
581 | }
|
---|
582 | }
|
---|
583 | return metadatum;
|
---|
584 | }
|
---|
585 |
|
---|
586 | private boolean isMatchingFileSet(String filename, String filename_text, boolean folder_level) {
|
---|
587 | // Crappy. There are apparently two ways of assigning, say, directory level metadata to anything in the ac01ne directory from a parent directories metadata.xml.
|
---|
588 | // The developers guide way: ac01ne/.*
|
---|
589 | // The dls way: ac01ne
|
---|
590 | // So the three tests are:
|
---|
591 | // 1. Check for an exact match i.e "ac01ne/ac01ne.htm" matches "ac01ne/".*
|
---|
592 | // 2. Check for a parent folder match, in the absence of further pattern i.e "ac01ne/ac01ne.htm" matches "ac01ne"
|
---|
593 | // 3. Check for a folder level match if thats what we are looking for i.e "null" matches ".*"
|
---|
594 |
|
---|
595 | ///ystem.err.println("Check for: " + (filename != null ? filename : ".*"));
|
---|
596 | ///ystem.err.println("Folder level = " + folder_level);
|
---|
597 | ///ystem.err.println("filename != null && '" + filename + "'.matches('" + filename_text + "') = " + (filename != null ? filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME) : false));
|
---|
598 | ///ystem.err.println("filename != null && '" + filename + "'.matches('" + filename_text + DIRECTORY_FILENAME_SUFFIX + "') [folder level = " + folder_level + "] = " + (filename != null ? filename.matches(filename_text + DIRECTORY_FILENAME_SUFFIX) && folder_level: false));
|
---|
599 | ///ystem.err.println("filename == null && '" + filename_text + "'.equals('.*') = " + (filename == null ? filename_text.equals(DIRECTORY_FILENAME) : false));
|
---|
600 | if (filename != null) {
|
---|
601 | if(folder_level) {
|
---|
602 | return filename.matches(filename_text) || filename.matches(filename_text + DIRECTORY_FILENAME_SUFFIX);
|
---|
603 | }
|
---|
604 | else {
|
---|
605 | return filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME);
|
---|
606 | }
|
---|
607 | }
|
---|
608 | else {
|
---|
609 | return filename_text.equals(DIRECTORY_FILENAME);
|
---|
610 | }
|
---|
611 | }
|
---|
612 |
|
---|
613 | /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */
|
---|
614 | public boolean isValid() {
|
---|
615 | // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata.
|
---|
616 | String doctype_name = base_document.getDoctype().getName();
|
---|
617 | String root_name = base_document.getDocumentElement().getTagName();
|
---|
618 | return ((doctype_name.equals("DirectoryMetadata") || doctype_name.equals("GreenstoneDirectoryMetadata")) && (root_name.equals("DirectoryMetadata") || root_name.equals("GreenstoneDirectoryMetadata")));
|
---|
619 | }
|
---|
620 |
|
---|
621 | /** Decode a string that was previously made Perl safe.
|
---|
622 | * @param safe The encoded <strong>String</strong> where dangerous characters have been escaped.
|
---|
623 | * @return A <strong>String</strong> with all the escaping removed.
|
---|
624 | */
|
---|
625 | private String decode(String safe) {
|
---|
626 | String dangerous = safe.replaceAll("\\\\.",".");
|
---|
627 | return dangerous;
|
---|
628 | }
|
---|
629 | }
|
---|
630 | /** A simplistic version of metadata, with no live references. */
|
---|
631 | private class BasicMetadata
|
---|
632 | implements Comparable {
|
---|
633 | public boolean accumulates;
|
---|
634 | /** The collection this metadata was extracted from. Important when attempting to map BasicMetadata to its Metadata incarnation. */
|
---|
635 | public File collection;
|
---|
636 | /** The metadata element. */
|
---|
637 | public String element = null;
|
---|
638 | /** The value. */
|
---|
639 | public String value = null;
|
---|
640 | /** Constructor takes initial values for element and value.
|
---|
641 | * @param element The metadata element as a <strong>String</strong>.
|
---|
642 | * @param value The value as a <strong>String</strong>.
|
---|
643 | */
|
---|
644 | public BasicMetadata(String element, String value, boolean accumulates) {
|
---|
645 | this.accumulates = accumulates;
|
---|
646 | this.element = element;
|
---|
647 | this.value = value;
|
---|
648 | }
|
---|
649 |
|
---|
650 | public BasicMetadata copy() {
|
---|
651 | return new BasicMetadata(element, value, accumulates);
|
---|
652 | }
|
---|
653 |
|
---|
654 | public int compareTo(Object other) {
|
---|
655 | return toString().compareTo(other.toString());
|
---|
656 | }
|
---|
657 | /** Compare two BasicMetadata objects for equality.
|
---|
658 | * @param object The other <strong>Object</strong>.
|
---|
659 | * @return <i>true</i> if this BasicMetadata matches the given object, <i>false</i> otherwise.
|
---|
660 | */
|
---|
661 | public boolean equals(Object object) {
|
---|
662 | BasicMetadata other = (BasicMetadata) object;
|
---|
663 | if(collection != null && other.collection != null) {
|
---|
664 | return (collection.equals(other.collection) && element.equals(other.element) && value.equals(other.value));
|
---|
665 | }
|
---|
666 | return (element.equals(other.element) && value.equals(other.value));
|
---|
667 | }
|
---|
668 | public String toString() {
|
---|
669 | return element + " = " + value;
|
---|
670 | }
|
---|
671 | }
|
---|
672 |
|
---|
673 | /** This class provides a cache for the instances of parsed collect.cfg files and their associated data. Assures that the most recently cached CollectCFG will remain available. Older objects are maintained as soft references and are freed at the JVM implementations descretion, but are gareunteed to be garbage collected before an OutOfMemory exception is thrown. */
|
---|
674 | private class CollectCFGCache
|
---|
675 | extends LinkedHashMap {
|
---|
676 | /** Retrieve the CollectCFG object that matches the given collection file path.
|
---|
677 | * @param collection_file The <strong>File</strong> that references the collection's directory.
|
---|
678 | * @return The <strong>CollectCFG</strong> that belongs to this collection, or <i>null</i> if no such file exists (so we probably aren't in a collection!).
|
---|
679 | */
|
---|
680 | public CollectCFG get(File collect_cfg_file) {
|
---|
681 | ///ystem.err.println("Retrieve the collection configuration file at: " + collect_cfg_file);
|
---|
682 | CollectCFG collect_cfg = null;
|
---|
683 | // Attempt to load from cache.
|
---|
684 | SoftReference reference = (SoftReference) super.get(collect_cfg_file);
|
---|
685 | // If is doesn't exist, either because its never been loaded, or thats its cache reference has gone stale, attempt to load it again.
|
---|
686 | if(reference == null || (collect_cfg = (CollectCFG)reference.get()) == null) {
|
---|
687 | try {
|
---|
688 | collect_cfg = new CollectCFG(collect_cfg_file);
|
---|
689 | put(collect_cfg_file, new SoftReference(collect_cfg));
|
---|
690 | }
|
---|
691 | catch(Exception error) {
|
---|
692 | Gatherer.printStackTrace(error);
|
---|
693 | collect_cfg = null;
|
---|
694 | }
|
---|
695 | }
|
---|
696 | return collect_cfg;
|
---|
697 | }
|
---|
698 |
|
---|
699 | protected boolean removeEldestEntry(java.util.Map.Entry entry) {
|
---|
700 | return (size() > MAX_CFG_CACHE_SIZE);
|
---|
701 | }
|
---|
702 | }
|
---|
703 |
|
---|
704 | /** The CollectCFG object encapsulates important metadata information extracted from a collect.cfg file, such as required metadata sets, and hfile associations. As the former are merged, their references are removed from this object, whereas the for the later references are replaced a representation of the hfile itself. */
|
---|
705 | private class CollectCFG {
|
---|
706 | /** A list of the metadata sets associated with the collect.cfg file. */
|
---|
707 | private ArrayList metadatasets = null;
|
---|
708 | /** A hash mapping from metadata element name to hierarchy file, or possibly hierarchy object. */
|
---|
709 | private HashMap hfiles = null;
|
---|
710 | /** The token at the start of a classify command line within the collect.cfg. */
|
---|
711 | static final private String CLASSIFY_COMMAND = "classify";
|
---|
712 | /** The token at the start of a metadataset command line within the collect.cfg. */
|
---|
713 | static final private String METADATASET_COMMAND = "metadataset";
|
---|
714 | /** Constructor which takes a file assumed to be the location of a collect.cfg file belonging to a Greenstone Collection.
|
---|
715 | * @param file A <strong>File</strong> referencing a collect.cfg file.
|
---|
716 | */
|
---|
717 | public CollectCFG(File file)
|
---|
718 | throws Exception {
|
---|
719 | ///atherer.println("Loading a new collection configuration file: " + file.getAbsolutePath());
|
---|
720 | File etc_directory = file.getParentFile();
|
---|
721 | hfiles = new HashMap();
|
---|
722 | metadatasets = new ArrayList();
|
---|
723 | FileReader reader = new FileReader(file);
|
---|
724 | BufferedReader in = new BufferedReader(reader);
|
---|
725 | String command = null;
|
---|
726 | while((command = in.readLine()) != null) {
|
---|
727 | CommandTokenizer tokenizer = new CommandTokenizer(command);
|
---|
728 | if(tokenizer.hasMoreTokens()) {
|
---|
729 | String token = tokenizer.nextToken().toLowerCase();
|
---|
730 | if(token.equals(METADATASET_COMMAND)) {
|
---|
731 | String family_name = tokenizer.nextToken();
|
---|
732 | String file_str = tokenizer.nextToken();
|
---|
733 | if(file_str.startsWith("\"") && file_str.endsWith("\"") && !file_str.equals("\"\"")) {
|
---|
734 | file_str = file_str.substring(1, file_str.length() - 1);
|
---|
735 | }
|
---|
736 | // If the file str is -only- the filename then we add <col_dir>/metadata/
|
---|
737 | File mds_file = null;
|
---|
738 | if(file_str.indexOf(File.separator) == -1) {
|
---|
739 | mds_file = new File(file.getParentFile().getParentFile(), File.separator + "metadata" + File.separator + file_str);
|
---|
740 | }
|
---|
741 | else {
|
---|
742 | mds_file = new File(file_str);
|
---|
743 | }
|
---|
744 | ///ystem.err.println("Attempting to file mds file at " + file.getAbsolutePath());
|
---|
745 | if(mds_file.exists()) {
|
---|
746 | metadatasets.add(mds_file);
|
---|
747 | }
|
---|
748 | mds_file = null;
|
---|
749 | file_str = null;
|
---|
750 | family_name = null;
|
---|
751 | }
|
---|
752 | // Also look for any classify commands that include an hfile and element
|
---|
753 | else if(token.equals(CLASSIFY_COMMAND)) {
|
---|
754 | String hfile_name = null;
|
---|
755 | String element_name = null;
|
---|
756 | // Drop the classifier name
|
---|
757 | tokenizer.nextToken();
|
---|
758 | while(tokenizer.hasMoreTokens()) {
|
---|
759 | token = tokenizer.nextToken().toLowerCase();
|
---|
760 | if(token.equals("-hfile")) {
|
---|
761 | hfile_name = tokenizer.nextToken();
|
---|
762 | }
|
---|
763 | else if(token.equals("-metadata")) {
|
---|
764 | element_name = tokenizer.nextToken();
|
---|
765 | }
|
---|
766 | }
|
---|
767 | if(hfile_name != null && element_name != null) {
|
---|
768 | // If hfile_name has no path, append the etc directories one. Either way create a file reference
|
---|
769 | File hfile = null;
|
---|
770 | hfile_name = hfile_name.replace('\\', File.separatorChar);
|
---|
771 | hfile_name = hfile_name.replace('/', File.separatorChar);
|
---|
772 | if(hfile_name.indexOf(File.separator) == -1) {
|
---|
773 | hfile = new File(etc_directory, hfile_name);
|
---|
774 | }
|
---|
775 | else {
|
---|
776 | hfile = new File(hfile_name);
|
---|
777 | }
|
---|
778 | // Add to hfiles
|
---|
779 | ///atherer.println("Adding hfile reference: " + element_name + " -> " + hfile);
|
---|
780 | hfiles.put(element_name, hfile);
|
---|
781 | hfile = null;
|
---|
782 | }
|
---|
783 | element_name = null;
|
---|
784 | hfile_name = null;
|
---|
785 | }
|
---|
786 | tokenizer = null;
|
---|
787 | }
|
---|
788 | }
|
---|
789 | command = null;
|
---|
790 | in.close();
|
---|
791 | reader.close();
|
---|
792 | in = null;
|
---|
793 | reader = null;
|
---|
794 | // Now we search the etc directory for *.txt files which we attempt to parse as hfiles
|
---|
795 | File children[] = etc_directory.listFiles(); // We are sure there is at least one, collect.cfg
|
---|
796 | for(int i = 0; i < children.length; i++) {
|
---|
797 | // If this is a text file, extract the element name and process
|
---|
798 | String name = children[i].getName();
|
---|
799 | if(children[i].isFile() && name.endsWith(".txt")) {
|
---|
800 | String element_name = name.substring(0, name.lastIndexOf("."));
|
---|
801 | if(!hfiles.containsKey(element_name)) {
|
---|
802 | ///atherer.println("Adding hfile reference: " + element_name + " -> " + children[i]);
|
---|
803 | hfiles.put(element_name, children[i]);
|
---|
804 | }
|
---|
805 | element_name = null;
|
---|
806 | }
|
---|
807 | name = null;
|
---|
808 | }
|
---|
809 | children = null;
|
---|
810 | etc_directory = null;
|
---|
811 | file = null;
|
---|
812 | }
|
---|
813 | /** Attempts to retrieve the HFile object associated with a certain metadata element. This may have already been cached, or may need to be loaded. Then again it may not even be necessary.
|
---|
814 | * @param element The fully qualified name of a metadata element, as a <strong>String</strong>.
|
---|
815 | * @return The <strong>HFile</strong> associated with the given element, or <i>null</i> if its unnecessary.
|
---|
816 | * @see org.greenstone.gatherer.cdm.CommandTokenizer
|
---|
817 | */
|
---|
818 | public HFile getHFile(String element) {
|
---|
819 | HFile result = null;
|
---|
820 | Object target = hfiles.get(element);
|
---|
821 | // If target is non-null
|
---|
822 | if(target != null) {
|
---|
823 | // If we haven't already load and parse the file.
|
---|
824 | if(target instanceof File) {
|
---|
825 | ///ystem.err.println("\nHFILE-MISS!! Loading " + target.toString());
|
---|
826 | result = new HFile();
|
---|
827 | try {
|
---|
828 | FileReader in_filereader = new FileReader((File)target);
|
---|
829 | //DecodeHTMLReader in_decodehtmlreader = new DecodeHTMLReader(in_filereader);
|
---|
830 | BufferedReader in = new BufferedReader(in_filereader);
|
---|
831 | String line = null;
|
---|
832 | while((line = in.readLine()) != null) {
|
---|
833 | CommandTokenizer tokenizer = new CommandTokenizer(line);
|
---|
834 | String alias = Utility.decodeGreenstone(tokenizer.nextToken());
|
---|
835 | String index = tokenizer.nextToken();
|
---|
836 | String value = Utility.decodeGreenstone(tokenizer.nextToken());
|
---|
837 | ///ystem.err.println("Read " + index + ", " + alias + ", " + value);
|
---|
838 | if(alias.startsWith("\"") && alias.endsWith("\"") && !alias.equals("\"\"")) {
|
---|
839 | alias = alias.substring(1, alias.length() - 1);
|
---|
840 | }
|
---|
841 | if(value.startsWith("\"") && value.endsWith("\"") && !value.equals("\"\"")) {
|
---|
842 | value = value.substring(1, value.length() - 1);
|
---|
843 | }
|
---|
844 | result.add(index, alias, value);
|
---|
845 | value = null;
|
---|
846 | alias = null;
|
---|
847 | index = null;
|
---|
848 | tokenizer = null;
|
---|
849 | }
|
---|
850 | line = null;
|
---|
851 | in.close();
|
---|
852 | in = null;
|
---|
853 | //in_decodehtmlreader = null;
|
---|
854 | in_filereader = null;
|
---|
855 | hfiles.put(element, result);
|
---|
856 | }
|
---|
857 | catch (Exception error) {
|
---|
858 | error.printStackTrace();
|
---|
859 | hfiles.remove(element);
|
---|
860 | }
|
---|
861 | }
|
---|
862 | else {
|
---|
863 | ///ystem.err.print("HFILE-HIT!!! ");
|
---|
864 | result = (HFile) target;
|
---|
865 | }
|
---|
866 | }
|
---|
867 | // Else no hfile is needed for this element
|
---|
868 | target = null;
|
---|
869 | return result;
|
---|
870 | }
|
---|
871 | /** Retrieve the list of metadata sets associated with this collection.
|
---|
872 | * @return An <strong>ArrayList</strong> of metadata set Files.
|
---|
873 | */
|
---|
874 | public ArrayList getMetadataSets() {
|
---|
875 | return metadatasets;
|
---|
876 | }
|
---|
877 | }
|
---|
878 |
|
---|
879 | /** The HFile object provides a container for the mappings from indexes, of the form 1.1.1, to alias-value pairs. It also provides method to retrieving the alias and value for a certain element, remembering that values must be expressed in terms of their absolute subject heirarchy path. */
|
---|
880 | private class HFile
|
---|
881 | extends HashMap {
|
---|
882 | /** Construct a new HFile object with no initial values. */
|
---|
883 | public HFile() {
|
---|
884 | super();
|
---|
885 | }
|
---|
886 | /** Add a new (index,(alias, value)) mapping.
|
---|
887 | * @param index The index of this mapping as a <strong>String</strong>.
|
---|
888 | * @param alias The alias of this mapping as a <strong>String</strong>.
|
---|
889 | * @param value And finally the value of this mapping as a, you guessed it, <strong>String</strong>.
|
---|
890 | */
|
---|
891 | public void add(String index, String alias, String value) {
|
---|
892 | Entry entry = new Entry(index, alias, value);
|
---|
893 | ///ystem.err.println("Adding entry: " + index + " \"" + alias + "\" \"" + value + "\"");
|
---|
894 | put(index, entry);
|
---|
895 | put(alias, entry);
|
---|
896 | }
|
---|
897 | public String getAlias(String index) {
|
---|
898 | String alias = "";
|
---|
899 | Entry entry = (Entry) get(index);
|
---|
900 | if(entry != null) {
|
---|
901 | alias = entry.alias;
|
---|
902 | }
|
---|
903 | entry = null;
|
---|
904 | return alias;
|
---|
905 | }
|
---|
906 | /** Retrieve the value associated with a certain index. This is harder than it first sounds as you must take into account the parent indexes of this one.
|
---|
907 | * @param index The index whose value you wish to calculate, as a <strong>String</strong>.
|
---|
908 | * @return The fully quantified path to the value that matches index, also as a <strong>String</strong>. Delimitiation between subject layers is denoted by the string "\\"
|
---|
909 | */
|
---|
910 | public String getValue(String index) {
|
---|
911 | ///ystem.err.println("Retrieve value for the alias/index: '" + index + "'");
|
---|
912 | StringBuffer value = new StringBuffer("");
|
---|
913 | // If index isn't the index, it must be the alias. Replace it with the index dammit.
|
---|
914 | Entry entry = null;
|
---|
915 | if(!Utility.isIndex(index)) {
|
---|
916 | ///ystem.err.println("\tThis is an alias.");
|
---|
917 | // Store this for later, as its exactly the same entry we'd get had we found the last component of a proper index.
|
---|
918 | entry = (Entry) get(index);
|
---|
919 | index = entry.index;
|
---|
920 | ///ystem.err.println("\tIndex is actually: " + index);
|
---|
921 | }
|
---|
922 | // Now build the hierarchy if necessary.
|
---|
923 | int dot_index = -1;
|
---|
924 | if((dot_index = index.indexOf(".")) != -1) {
|
---|
925 | ///ystem.err.println("\tHierarchy information required -->");
|
---|
926 | value.append(getValue(index.substring(0, dot_index)));
|
---|
927 | value.append(StaticStrings.ESCAPE_STR + StaticStrings.ESCAPE_STR);
|
---|
928 | ///ystem.err.println("\t<-- Hierarchy information complete");
|
---|
929 | }
|
---|
930 | if(entry == null) {
|
---|
931 | entry = (Entry) get(index);
|
---|
932 | }
|
---|
933 | if(entry != null) {
|
---|
934 | value.append(entry.value);
|
---|
935 | }
|
---|
936 | entry = null;
|
---|
937 | ///ystem.err.println("\tFinal value is: '" + value.toString() + "'\n");
|
---|
938 | return value.toString();
|
---|
939 | }
|
---|
940 |
|
---|
941 | private class Entry {
|
---|
942 | public String alias = null;
|
---|
943 | public String index = null;
|
---|
944 | public String value = null;
|
---|
945 | public Entry(String index, String alias, String value) {
|
---|
946 | this.alias = alias;
|
---|
947 | this.index = index;
|
---|
948 | this.value = value;
|
---|
949 | }
|
---|
950 | }
|
---|
951 | }
|
---|
952 |
|
---|
953 | private class MetadataXMLFileSearch {
|
---|
954 | public File file;
|
---|
955 | public String filename;
|
---|
956 | public MetadataXMLFileSearch(File file, String filename) {
|
---|
957 | this.file = file;
|
---|
958 | this.filename = filename;
|
---|
959 | }
|
---|
960 | }
|
---|
961 | }
|
---|