source: trunk/gli/src/org/greenstone/gatherer/msm/MetadataXMLFileManager.java@ 8088

Last change on this file since 8088 was 8088, checked in by mdewsnip, 20 years ago

Fixed a bug where assigned metadata would be stuffed up on Windows.

  • Property svn:keywords set to Author Date Id Revision
File size: 23.5 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.msm;
38
39import java.awt.*;
40import java.awt.event.*;
41import java.io.*;
42import java.util.*;
43import javax.swing.*;
44import javax.swing.event.*;
45import javax.swing.tree.*;
46import org.greenstone.gatherer.Gatherer;
47import org.greenstone.gatherer.file.FileNode;
48import org.greenstone.gatherer.gui.GLIButton;
49import org.greenstone.gatherer.gui.MetaEditPrompt;
50import org.greenstone.gatherer.msm.ElementWrapper;
51import org.greenstone.gatherer.msm.MetadataXMLFile;
52import org.greenstone.gatherer.msm.MetadataXMLFileParser;
53import org.greenstone.gatherer.msm.Metadata;
54import org.greenstone.gatherer.msm.MSMEvent;
55import org.greenstone.gatherer.msm.MSMListener;
56import org.greenstone.gatherer.util.DOMTree;
57import org.greenstone.gatherer.util.HashMap3D;
58import org.greenstone.gatherer.util.StaticStrings;
59import org.greenstone.gatherer.util.Utility;
60import org.w3c.dom.*;
61
62/** This object manages the metadata attached to file records. By storing all of the metadata in one place you garner several advantages. Firstly only one copy of each metadata object is retained, all actual entries are converted to references. Next you can immediately determine what metadata is assigned to an entire directory, thus the metadata.xml files can be built more effeciently (whereas the current 'optimal' method uses recursion through the tree contents). Finally, and perhaps most importantly, it allows for dynamic 'on demand' lookup of metadata. This is especially necessary with large collections, where the raw, unconnected metadata files could range into the tens of megabytes of memory and require hundreds of megabytes to read by in using serialization. Dynamic loading allows you to connect the metadata objects on load, reducing value node paths (possibly of hundreds of characters) down to a single reference pointer! At the very worst this object uses far less memory than the current method, and given that the current method is completely incapable of handling large collections, is necessary. The trade off of course is in time needed to load metadata.xml on demand, the worst possible case being the user selecting the root node of the collection tree of a large collection immediately after opening the collection. The subsequent attempt to build the metadata table will result in the metadata being loaded for every single file. But since this process is sequential and a small cache of metadata.xml files is implemented, and given that the table will actually be build on a separate thread, the wait should not be too arduous.<BR>
63 As for the size of the MetadataXMLFileParser cache, I was at first tempted to put around five. However after analysis of cache usage, I determined that no gain occured because of caching (in fact if everythings working as it should there should only ever be one call for a certain metadata.xml).
64*/
65public class MetadataXMLFileManager
66 extends LinkedHashMap
67 implements MSMListener {
68 /** A list of the known metadata instances, thus we only store one of each unique metadata, and reference the rest. */
69 static public HashMap3D metadata_cache = null;
70 private DOMTree tree = null;
71 private JComboBox documents_in_cache_combobox = null;
72 /** The root file node. */
73 private TreeNode root = null;
74 /** The threaded object responsible for loading all of the existing metadata.xml files prior to any save action. This is necessary so that hierarchy indexes within the metadata.xml files stay fresh. */
75 //private GDMLoader gdm_loader = null;
76 /** The maximum number of MetadataXMLFiles to load at any one time */
77 static final private int MAX_DOCUMENTS = 25;
78 /** Constructor. */
79 public MetadataXMLFileManager() {
80 super();
81 this.metadata_cache = new HashMap3D(Gatherer.c_man.getCollection().msm.getSize());
82 // Connect
83 Gatherer.c_man.getCollection().msm.addMSMListener(this);
84 // We also have a debug dialog
85 if(Gatherer.debug != null) {
86 display();
87 }
88 }
89
90 /** This may seem a little odd but this method doesn't add the given metadata directly, instead calling fireMetadataChanged in MetadataSetManager so as to recursively add metadata if necessary, and to ensure that all listeners who are interested in data change (such as the Metadata Table and Save listeners) can be up to date. */
91 public void addMetadata(FileNode node, ArrayList metadatum) {
92 for(int i = 0; i < metadatum.size(); i++) {
93 Metadata metadata = (Metadata) metadatum.get(i);
94 Gatherer.c_man.getCollection().msm.fireMetadataChanged(node, (Metadata)null, metadata);
95 }
96 }
97
98 /** Destructor necessary for clean exit, subsequent to saving of metadata.xml files.
99 * @see org.greenstone.gatherer.Gatherer
100 * @see org.greenstone.gatherer.collection.CollectionManager
101 * @see org.greenstone.gatherer.msm.MetadataXMLFileParser
102 * @see org.greenstone.gatherer.msm.MetadataSetManager
103 */
104 public void destroy() {
105 // Destroy all the cached documents.
106 /*
107 Iterator iterator = keySet().iterator();
108 while(iterator.hasNext()) {
109 File file = (File) iterator.next();
110 MetadataXMLFile document = (MetadataXMLFile) get(file);
111 document.destroy(file);
112 }
113 */
114 // Deregister as listener
115 Gatherer.c_man.getCollection().msm.removeMSMListener(this);
116 // Deallocate all data members
117 metadata_cache.clear();
118 metadata_cache = null;
119 // Finally clear self
120 clear();
121 // Done!
122 }
123
124 /** This debug facility shows the currently loaded collect.cfg or CollectConfig.xml file as a DOM tree. */
125 public void display() {
126 JDialog dialog = new JDialog(Gatherer.g_man, "Greenstone Metadata Document Manager", false);
127 dialog.setSize(400,400);
128 JPanel content_pane = (JPanel) dialog.getContentPane();
129 MetadataXMLFile metadata_xml = new MetadataXMLFile();
130 tree = new DOMTree(metadata_xml.getDocument());
131 metadata_xml = null;
132 documents_in_cache_combobox = new JComboBox();
133 documents_in_cache_combobox.addItemListener(new DOMItemListener());
134 JButton refresh_button = new GLIButton("Refresh Tree");
135 refresh_button.setMnemonic(KeyEvent.VK_R);
136 refresh_button.addActionListener(new ActionListener() {
137 public void actionPerformed(ActionEvent event) {
138 File metadata_file = (File) documents_in_cache_combobox.getSelectedItem();
139 if(metadata_file != null) {
140 MetadataXMLFile document = (MetadataXMLFile) get(metadata_file);
141 tree.setDocument(document.getDocument());
142 document = null;
143 metadata_file = null;
144 }
145 }
146 });
147 content_pane.setBorder(BorderFactory.createEmptyBorder(5,5,5,5));
148 content_pane.setLayout(new BorderLayout());
149 content_pane.add(documents_in_cache_combobox, BorderLayout.NORTH);
150 content_pane.add(new JScrollPane(tree), BorderLayout.CENTER);
151 content_pane.add(refresh_button, BorderLayout.SOUTH);
152 dialog.show();
153 }
154
155 private class DOMItemListener
156 implements ItemListener {
157
158 public void itemStateChanged(ItemEvent event) {
159 if(event.getStateChange() == ItemEvent.SELECTED) {
160 File metadata_file = (File) documents_in_cache_combobox.getSelectedItem();
161 if(metadata_file != null) {
162 MetadataXMLFile document = (MetadataXMLFile) get(metadata_file);
163 tree.setDocument(document.getDocument());
164 document = null;
165 metadata_file = null;
166 }
167 }
168 }
169 }
170
171 /** Method that is called whenever an element within a set is changed or modified. Ensure all cached MetadataXMLFiles are marked as stale.
172 * @param event A <strong>MSMEvent</strong> containing details of the event that caused this message to be fired.
173 */
174 public void elementChanged(MSMEvent event) {
175 for(Iterator values = values().iterator(); values.hasNext(); ) {
176 MetadataXMLFile document = (MetadataXMLFile) values.next();
177 document.setUpToDate(false);
178 document = null;
179 }
180 }
181 /** Retrieve the GreenstoneMetadataDocument that is associated with a certain file. If the document is in cache returns it. If the document exists but isn't in cache loads, caches, then returns it. Otherwise it creates a brand new document, caches it, then returns it.
182 * @see org.greenstone.gatherer.msm.MetadataXMLFileParser
183 */
184 public MetadataXMLFile getDocument(File file) {
185 ///ystem.err.println("Get the MetadataXMLFile for " + file.getAbsolutePath());
186 MetadataXMLFile metadata_xml = null;
187 // Determine the name of the target files metadata.xml file.
188 File metadata_file = null;
189 if(file.isFile()) {
190 metadata_file = new File(file.getParentFile(), StaticStrings.METADATA_XML);
191 }
192 else {
193 metadata_file = new File(file, StaticStrings.METADATA_XML);
194 }
195 // Then try to retrieve it from cache. First we consider the case of a cache hit.
196 if(containsKey(metadata_file)) {
197 metadata_xml = (MetadataXMLFile) get(metadata_file);
198 ///ystem.err.println("Get the MetadataXMLFile for " + metadata_file.getAbsolutePath());
199 }
200 else {
201 // Now the two potential cache misses. The first requires us to load an existing metadata.xml
202 if(metadata_file.exists()) {
203 ///ystem.err.println("Reload the MetadataXMLFile for " + metadata_file.getAbsolutePath());
204 metadata_xml = new MetadataXMLFile(metadata_file);
205 }
206 // The final case is where no current metadata.xml exists. Create a new one just by creating a new MetadataXMLFile.
207 else {
208 ///ystem.err.println("Create a new MetadataXMLFile for " + metadata_file.getAbsolutePath());
209 metadata_xml = new MetadataXMLFile();
210 }
211 put(metadata_file, metadata_xml);
212 if(documents_in_cache_combobox != null) {
213 documents_in_cache_combobox.addItem(metadata_file);
214 }
215 //Gatherer.println("[0ms]\tCached " + metadata_file);
216 }
217 return metadata_xml;
218 }
219
220
221 /** Recover the metadata associated with a particular file. Note that this call is synchronized, so that all of the data holders don't need to be. */
222 public synchronized ArrayList getMetadata(File file) {
223 return getMetadata(file, false, true);
224 }
225
226/** Recover the metadata associated with a particular file excluding folder level metadata. Note that this call is synchronized, so that all of the data holders don't need to be. */
227 public synchronized ArrayList getMetadataOnly(File file) {
228 return getMetadata(file, false, false);
229 }
230
231 public synchronized ArrayList getMetadata(File file, ElementWrapper element) {
232 ArrayList metadata = getMetadata(file, false, true);
233 ArrayList values = new ArrayList();
234 for(int i = 0; i < metadata.size(); i++) {
235 Metadata data = (Metadata) metadata.get(i);
236 if(element.equals(data.getElement())) {
237 values.add(data.getValue());
238 }
239 }
240 if(values.size() > 0) {
241 Collections.sort(values);
242 }
243 return values;
244 }
245
246 private ArrayList getMetadata(File file, boolean remove, boolean append_folder_level) {
247 ArrayList metadata = null;
248 String filename = null;
249 if(file.isFile()) {
250 filename = file.getName();
251 file = file.getParentFile();
252 }
253 MetadataXMLFile document = getDocument(file);
254 if(file != null && document != null) {
255 metadata = document.getMetadata(filename, remove, metadata, file, append_folder_level);
256 document = null;
257 }
258 return metadata;
259 }
260
261 public synchronized ArrayList getAllMetadata(File file) { // boolean remove) {
262 ///ystem.err.println("getMetadata(" + file.getAbsolutePath() + ")");
263 ArrayList metadata = null;
264 // Build up a list of all the metadata xml files we have to check for metadata.
265 ArrayList search_files = new ArrayList();
266 String filename = null;
267 File start_file = file;
268 if(file.isFile()) {
269 filename = file.getName();
270 start_file = file.getParentFile();
271 }
272 File collection_dir = new File(Gatherer.c_man.getCollectionDirectory());
273 ///ystem.err.println("Collection directory = " + collection_dir.getAbsolutePath());
274 ///ystem.err.println("Start directory = " + start_file.getAbsolutePath());
275 while(!start_file.equals(collection_dir)) {
276 ///ystem.err.println("Blip!");
277 search_files.add(0, new MetadataXMLFileSearch(start_file, filename));
278 if(filename != null) {
279 filename = start_file.getName() + File.separator + filename;
280 }
281 else {
282 filename = start_file.getName() + File.separator;
283 }
284 start_file = start_file.getParentFile();
285 ///ystem.err.println("Start directory = " + start_file.getAbsolutePath());
286 }
287 // Now search each of these metadata xml for metadata, remembering to accumulate or overwrite as we go along.
288 for(int i = 0; i < search_files.size(); i++) {
289 MetadataXMLFileSearch a_search = (MetadataXMLFileSearch) search_files.get(i);
290 ///ystem.err.println("Search " + a_search.file.getAbsolutePath() + File.separator + "metadata.xml for " + (a_search.filename != null ? a_search.filename : "directory metadata"));
291 // Retrieve the document
292 MetadataXMLFile document = getDocument(a_search.file);
293 if(document != null) {
294 // There is one piece of slight of hand here. You can never remove metadata during a get all metadata.
295 metadata = document.getMetadata(a_search.filename, false, metadata, a_search.file, true);
296 ///ystem.err.println("Current metadata: " + toString(metadata));
297 document = null;
298 }
299 a_search = null;
300 }
301 start_file = null;
302 collection_dir = null;
303 filename = null;
304 search_files.clear();
305 search_files = null;
306 return metadata;
307 }
308
309 public String toString(ArrayList list) {
310 StringBuffer text = new StringBuffer("(");
311 for(int i = 0; list != null && i < list.size(); i++) {
312 text.append((list.get(i)).toString());
313 if(i < list.size() - 1) {
314 text.append(", ");
315 }
316 }
317 text.append(")");
318 return text.toString();
319 }
320
321 private class MetadataXMLFileSearch {
322 public File file;
323 public String filename;
324 public MetadataXMLFileSearch(File file, String filename) {
325 this.file = file;
326 this.filename = filename;
327 }
328 }
329
330 /** Called whenever the metadata value changes in some way, such as the addition of a new value. This is the only event type we care about, but we care about it a lot. It tells us what metadata to add, remove, etc from the cached metadata.xml files. Note that this method is synchronized so that the data objects don't need to be.
331 * @param event A <strong>MSMEvent</strong> containing details of the event that caused this message to be fired.
332 * @see org.greenstone.gatherer.msm.MetadataXMLFile
333 * @see org.greenstone.gatherer.msm.Metadata
334 * @see org.greenstone.gatherer.util.HashMap3D
335 */
336 public synchronized void metadataChanged(MSMEvent event) {
337 // System.err.println("In MetadataXMLFileManager::metadataChanged(" + event + ")...");
338
339 File file = event.getFile();
340 if(file == null) {
341 FileNode record = event.getRecord();
342 file = record.getFile();
343 }
344 Metadata new_metadata = event.getNewMetadata();
345 Metadata old_metadata = event.getOldMetadata();
346 // These metadata objects may be new instances of metadata objects that already exist. Replace them if they are.
347 new_metadata = checkCache(new_metadata);
348 old_metadata = checkCache(old_metadata);
349 // Now apply the change to the document in question.
350 MetadataXMLFile metadata_xml = getDocument(file);
351 if(metadata_xml != null) {
352 if(old_metadata != null) {
353 // File level
354 if(file.isFile()) {
355 metadata_xml.removeMetadata(file.getName(), old_metadata);
356 }
357 // Folder level
358 else {
359 metadata_xml.removeMetadata(null, old_metadata);
360 }
361 }
362 if(new_metadata != null) {
363 // Convert relative paths into absolute paths
364 String filename = file.toString();
365 if ((Utility.isWindows() == true && filename.charAt(1) != ':') ||
366 (Utility.isWindows() == false && !filename.startsWith(File.separator))) {
367 String collection_dir = Utility.getCollectionDir();
368 File full_file = new File(collection_dir + filename);
369 file = full_file;
370 }
371
372 // File level
373 if(file.isFile()) {
374 // System.err.println("Calling metadata_xml.addMetadata(" + new_metadata + ")...");
375 metadata_xml.addMetadata(file.getName(), new_metadata, event.getAction() == MetaEditPrompt.ACCUMULATE);
376 }
377 else {
378 metadata_xml.addMetadata(null, new_metadata, event.getAction() == MetaEditPrompt.ACCUMULATE);
379 }
380 }
381 }
382 }
383
384 public void removeExtractedMetadata() {
385 try {
386 // Remove all of the extracted metadata in the collection
387 removeExtractedMetadata(new File(Gatherer.c_man.getCollectionImport()));
388 }
389 catch (Exception exception) {
390 Gatherer.println("Exception in MetadataXMLFileManager.removeExtractedMetadata - unexpected");
391 Gatherer.printStackTrace(exception);
392 }
393 }
394
395 private void removeExtractedMetadata(File file) {
396 // Retrieve the gdm document for this file
397 MetadataXMLFile metadata_xml_document = getDocument(file);
398 // Remove the extracted metadata
399 ///ystem.err.println("Removing the extracted metadata from the file: " + file);
400 metadata_xml_document.removeExtractedMetadata();
401 metadata_xml_document = null;
402 // Then recurse down the directory structure looking for other metadata.xml files
403 File child_files[] = file.listFiles();
404 for(int i = 0; i < child_files.length; i++) {
405 if(child_files[i].isDirectory()) {
406 removeExtractedMetadata(child_files[i]);
407 }
408 }
409 child_files = null;
410 }
411
412 public ArrayList removeMetadata(File file) {
413 return getMetadata(file, true, false);
414 }
415
416 /** Causes all currently loaded MetadataXMLFiles to write themselves out.
417 * @see org.greenstone.gatherer.msm.MetadataXMLFile
418 */
419 public void save() {
420 Iterator iterator = keySet().iterator();
421 while(iterator.hasNext()) {
422 File file = (File) iterator.next();
423 MetadataXMLFile document = (MetadataXMLFile) get(file);
424 save(file, document);
425 }
426 }
427
428 /** Used to cause the document associated with a particular file to write the latest copy of itself to disk. */
429 public void save(FileNode node) {
430 File file = node.getFile();
431 if(file != null && file.isFile()) {
432 MetadataXMLFile document = getDocument(file);
433 if (document != null && !document.isUpToDate()) {
434 File xml_file;
435 if(file.isFile()) {
436 xml_file = new File(file.getParentFile(), "metadata.xml");
437 }
438 else {
439 xml_file = new File(file, "metadata.xml");
440 }
441 save(xml_file, document);
442 xml_file = null;
443 }
444 document = null;
445 }
446 file = null;
447 }
448
449
450 /** Write out the latest copy of a certain document. */
451 public void save(File file, MetadataXMLFile document) {
452 if(!document.isUpToDate()) {
453 // First purge any old references.
454 document.cleanUpMetadataRefs();
455 // If there is no metadata in this document then don't write out a file. In fact delete any file that already exists.
456 boolean has_metadata = document.hasMetadata();
457 if (has_metadata) {
458 // Now write the xml
459 Utility.export(document.getDocument(), file);
460 }
461 else if(file.exists()) {
462 file.delete();
463 }
464 document.setUpToDate(true);
465 }
466 }
467
468
469 /** Method that is called whenever the metadata set collection changes in some way, such as the addition of a new set or the merging of two sets. If a set changes, mark all cached MetadataXMLFiles as being stale.
470 * @param event A <strong>MSMEvent</strong> containing details of the event that caused this message to be fired.
471 */
472 public void setChanged(MSMEvent event) {
473 for(Iterator values = values().iterator(); values.hasNext(); ) {
474 MetadataXMLFile document = (MetadataXMLFile) values.next();
475 document.setUpToDate(false);
476 document = null;
477 }
478 }
479 /** Called whenever the value tree of an metadata element changes in some way, such as the addition of a new value. --While the comments below are now obsolete, I'll keep them just to remind me of how easy it is to back yourself into a corner with issues such as caching and persisitant references--. Such an action would require us to painstakingly reload every metadata.xml file using the value model prior to the change, then painstakingly write out each metadata.xml file again using the modified model, but I'm a glutton for punishment so thats ok. The alternative is to not do this and watch in horror as heirarchy references quickly fall into disarray, pointing to the wrong place. This task gets even more complicated by three facts:<br>1. We want to do this is a seperate thread, as we don't want the program to come to a screaming halt while we're updating metadata.xml files.<br>2. We have to prevent any metadata.xml files being removed from cache while we're doing this, as if we encounter these more recently written files their heirarchy references will already be correct and that will balls up our little process. Note that this means the saving process may have to block while pending metadata heirarchy updates are in progress.<br>3. Regarding (2) we don't have to rewrite any metadata.xml files already in cache as they will be correctly written out whenever they happen to be dumped from cache.<br>4. We need the ability to pre-empt the general update to load a user demanded metadata.xml and store it in cache, using the old value tree model as per usual, and<br>5. We have to store a cue of these events, and process them one at a time. Perhaps one day when I'm feeling masacistic I'll figure out someway to merge several updates into one, but for now we have to change the tree one node at a time in order for references to remain correct.<br>Ok, so thats five facts, but you get the gist. Not an easy task, but crucial for accurate storage and recall of metadata heirarchies.
480 * @param event A <strong>MSMEvent</strong> containing details of the event that caused this message to be fired.
481 */
482 public void valueChanged(MSMEvent event) {}
483
484 public void waitUntilComplete() {
485 //if(gdm_loader != null) {
486 // gdm_loader.waitUntilComplete();
487 //}
488 }
489
490
491 private Metadata checkCache(Metadata metadata)
492 {
493 Gatherer.println("Checking cache for " + metadata + "...");
494
495 if (metadata == null) {
496 return null;
497 }
498
499 // Check if the metadata cache already contains this (element, value) pair
500 String element_name = metadata.getElement().toString();
501 if (metadata_cache.contains(element_name, metadata.getValue())) {
502 // If so, return the cached metadata value
503 Gatherer.println("In cache!");
504 return (Metadata) metadata_cache.get(element_name, metadata.getValue());
505 }
506
507 // Not cached, so return the original Metadata object
508 return metadata;
509 }
510
511
512 protected boolean removeEldestEntry(Map.Entry eldest) {
513 if(size() > MAX_DOCUMENTS) {
514 // Save the oldest document before its dumped
515 File file = (File) eldest.getKey();
516 ///ystem.err.println("Dumping oldest Document: " + file.getAbsolutePath());
517 MetadataXMLFile document = (MetadataXMLFile) eldest.getValue();
518 save(file, document);
519
520 if(documents_in_cache_combobox != null) {
521 documents_in_cache_combobox.removeItem(file);
522 }
523 // And then dump it
524 return true;
525 }
526 else {
527 return false;
528 }
529 }
530}
Note: See TracBrowser for help on using the repository browser.