source: trunk/gli/src/org/greenstone/gatherer/msm/GDMManager.java@ 6093

Last change on this file since 6093 was 6036, checked in by jmt12, 21 years ago

Removed the dummy code, and the GDMLoader, as legacy stuff is now handled in its own class

  • Property svn:keywords set to Author Date Id Revision
File size: 22.4 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.msm;
38
39import java.io.*;
40import java.util.*;
41import javax.swing.tree.*;
42import org.greenstone.gatherer.Gatherer;
43import org.greenstone.gatherer.file.FileNode;
44import org.greenstone.gatherer.gui.MetaEditPrompt;
45import org.greenstone.gatherer.msm.ElementWrapper;
46import org.greenstone.gatherer.msm.GDMDocument;
47import org.greenstone.gatherer.msm.GDMParser;
48import org.greenstone.gatherer.msm.Metadata;
49import org.greenstone.gatherer.msm.MSMEvent;
50import org.greenstone.gatherer.msm.MSMListener;
51import org.greenstone.gatherer.util.HashMap3D;
52import org.greenstone.gatherer.util.StaticStrings;
53import org.greenstone.gatherer.util.Utility;
54import org.w3c.dom.*;
55
56/** This object manages the metadata attached to file records. By storing all of the metadata in one place you garner several advantages. Firstly only one copy of each metadata object is retained, all actual entries are converted to references. Next you can immediately determine what metadata is assigned to an entire directory, thus the metadata.xml files can be built more effeciently (whereas the current 'optimal' method uses recursion through the tree contents). Finally, and perhaps most importantly, it allows for dynamic 'on demand' lookup of metadata. This is especially necessary with large collections, where the raw, unconnected metadata files could range into the tens of megabytes of memory and require hundreds of megabytes to read by in using serialization. Dynamic loading allows you to connect the metadata objects on load, reducing value node paths (possibly of hundreds of characters) down to a single reference pointer! At the very worst this object uses far less memory than the current method, and given that the current method is completely incapable of handling large collections, is necessary. The trade off of course is in time needed to load metadata.xml on demand, the worst possible case being the user selecting the root node of the collection tree of a large collection immediately after opening the collection. The subsequent attempt to build the metadata table will result in the metadata being loaded for every single file. But since this process is sequential and a small cache of metadata.xml files is implemented, and given that the table will actually be build on a separate thread, the wait should not be too arduous.<BR>
57 As for the size of the GDMParser cache, I was at first tempted to put around five. However after analysis of cache usage, I determined that no gain occured because of caching (in fact if everythings working as it should there should only ever be one call for a certain metadata.xml).
58*/
59public class GDMManager
60 extends LinkedHashMap
61 implements MSMListener {
62 /** A list of the known metadata instances, thus we only store one of each unique metadata, and reference the rest. */
63 static public HashMap3D metadata_cache = null;
64 /** The root file node. */
65 private TreeNode root = null;
66 /** The threaded object responsible for loading all of the existing metadata.xml files prior to any save action. This is necessary so that hierarchy indexes within the metadata.xml files stay fresh. */
67 //private GDMLoader gdm_loader = null;
68 /** The maximum number of GDMDocuments to load at any one time */
69 static final private int MAX_DOCUMENTS = 25;
70 /** Constructor. */
71 public GDMManager() {
72 super();
73 this.metadata_cache = new HashMap3D(Gatherer.c_man.getCollection().msm.getSize());
74 // Connect
75 Gatherer.c_man.getCollection().msm.addMSMListener(this);
76 }
77
78 /** This may seem a little odd but this method doesn't add the given metadata directly, instead calling fireMetadataChanged in MetadataSetManager so as to recursively add metadata if necessary, and to ensure that all listeners who are interested in data change (such as the Metadata Table and Save listeners) can be up to date. */
79 public void addMetadata(FileNode node, ArrayList metadatum) {
80 for(int i = 0; i < metadatum.size(); i++) {
81 Metadata metadata = (Metadata) metadatum.get(i);
82 Gatherer.c_man.getCollection().msm.fireMetadataChanged(node, (Metadata)null, metadata);
83 }
84 }
85
86 /** Destructor necessary for clean exit, subsequent to saving of metadata.xml files.
87 * @see org.greenstone.gatherer.Gatherer
88 * @see org.greenstone.gatherer.collection.CollectionManager
89 * @see org.greenstone.gatherer.msm.GDMParser
90 * @see org.greenstone.gatherer.msm.MetadataSetManager
91 */
92 public void destroy() {
93 // Destroy all the cached documents.
94 /*
95 Iterator iterator = keySet().iterator();
96 while(iterator.hasNext()) {
97 File file = (File) iterator.next();
98 GDMDocument document = (GDMDocument) get(file);
99 document.destroy(file);
100 }
101 */
102 // Deregister as listener
103 Gatherer.c_man.getCollection().msm.removeMSMListener(this);
104 // Deallocate all data members
105 metadata_cache.clear();
106 metadata_cache = null;
107 // Finally clear self
108 clear();
109 // Done!
110 }
111 /** Method that is called whenever an element within a set is changed or modified. Ensure all cached GDMDocuments are marked as stale.
112 * @param event A <strong>MSMEvent</strong> containing details of the event that caused this message to be fired.
113 */
114 public void elementChanged(MSMEvent event) {
115 for(Iterator values = values().iterator(); values.hasNext(); ) {
116 GDMDocument document = (GDMDocument) values.next();
117 document.setUpToDate(false);
118 document = null;
119 }
120 }
121 /** Retrieve the GreenstoneMetadataDocument that is associated with a certain file. If the document is in cache returns it. If the document exists but isn't in cache loads, caches, then returns it. Otherwise it creates a brand new document, caches it, then returns it.
122 * @see org.greenstone.gatherer.msm.GDMParser
123 */
124 public GDMDocument getDocument(File file) {
125 ///ystem.err.println("Get the GDMDocument for " + file.getAbsolutePath());
126 GDMDocument metadata_xml = null;
127 // Determine the name of the target files metadata.xml file.
128 File metadata_file = null;
129 if(file.isFile()) {
130 metadata_file = new File(file.getParentFile(), StaticStrings.METADATA_XML);
131 }
132 else {
133 metadata_file = new File(file, StaticStrings.METADATA_XML);
134 }
135 // Then try to retrieve it from cache. First we consider the case of a cache hit.
136 if(containsKey(metadata_file)) {
137 metadata_xml = (GDMDocument) get(metadata_file);
138 ///ystem.err.println("Get the GDMDocument for " + metadata_file.getAbsolutePath());
139 }
140 else {
141 // Now the two potential cache misses. The first requires us to load an existing metadata.xml
142 if(metadata_file.exists()) {
143 ///ystem.err.println("Reload the GDMDocument for " + metadata_file.getAbsolutePath());
144 metadata_xml = new GDMDocument(metadata_file);
145 }
146 // The final case is where no current metadata.xml exists. Create a new one just by creating a new GDMDocument.
147 else {
148 ///ystem.err.println("Create a new GDMDocument for " + metadata_file.getAbsolutePath());
149 metadata_xml = new GDMDocument();
150 }
151 put(metadata_file, metadata_xml);
152 //Gatherer.println("[0ms]\tCached " + metadata_file);
153 }
154 return metadata_xml;
155 }
156
157 /*
158 // returns metadata file from cache or creates a new one
159 public GDMDocument getDummyDocument(File file) {
160 ///ystem.err.println("Get the GDMDocument for " + file.getAbsolutePath());
161 GDMDocument metadata_xml = null;
162 // Determine the name of the target files metadata.xml file.
163 File metadata_file = null;
164 if(file.isFile()) {
165 metadata_file = new File(file.getParentFile(), StaticStrings.METADATA_XML);
166 }
167 else {
168 metadata_file = new File(file, StaticStrings.METADATA_XML);
169 }
170 // Then try to retrieve it from cache. First we consider the case of a cache hit.
171 if(containsKey(metadata_file)) {
172 metadata_xml = (GDMDocument) get(metadata_file);
173 }
174 else {
175 metadata_xml = new GDMDocument();
176 put(metadata_file, metadata_xml);
177 }
178 return metadata_xml;
179 }
180 */
181
182 /*
183 public synchronized void dummyGetMetadata(File file) {
184 String filename = null;
185 if(file.isFile()) {
186 filename = file.getName();
187 file = file.getParentFile();
188 }
189 GDMDocument document = getDummyDocument(file);
190 }
191 */
192
193 /** Recover the metadata associated with a particular file. Note that this call is synchronized, so that all of the data holders don't need to be. */
194 public synchronized ArrayList getMetadata(File file) {
195 return getMetadata(file, false, true);
196 }
197
198/** Recover the metadata associated with a particular file excluding folder level metadata. Note that this call is synchronized, so that all of the data holders don't need to be. */
199 public synchronized ArrayList getMetadataOnly(File file) {
200 return getMetadata(file, false, false);
201 }
202
203 public synchronized ArrayList getMetadata(File file, ElementWrapper element) {
204 ArrayList metadata = getMetadata(file, false, true);
205 ArrayList values = new ArrayList();
206 for(int i = 0; i < metadata.size(); i++) {
207 Metadata data = (Metadata) metadata.get(i);
208 if(element.equals(data.getElement())) {
209 values.add(data.getValue());
210 }
211 }
212 if(values.size() > 0) {
213 Collections.sort(values);
214 }
215 return values;
216 }
217
218 private ArrayList getMetadata(File file, boolean remove, boolean append_folder_level) {
219 ArrayList metadata = null;
220 String filename = null;
221 if(file.isFile()) {
222 filename = file.getName();
223 file = file.getParentFile();
224 }
225 GDMDocument document = getDocument(file);
226 if(document != null) {
227 metadata = document.getMetadata(filename, remove, metadata, file, append_folder_level);
228 document = null;
229 }
230 return metadata;
231 }
232
233 public synchronized ArrayList getAllMetadata(File file) { // boolean remove) {
234 ///ystem.err.println("getMetadata(" + file.getAbsolutePath() + ")");
235 ArrayList metadata = null;
236 // Build up a list of all the metadata xml files we have to check for metadata.
237 ArrayList search_files = new ArrayList();
238 String filename = null;
239 File start_file = file;
240 if(file.isFile()) {
241 filename = file.getName();
242 start_file = file.getParentFile();
243 }
244 File collection_dir = new File(Gatherer.c_man.getCollectionDirectory());
245 ///ystem.err.println("Collection directory = " + collection_dir.getAbsolutePath());
246 ///ystem.err.println("Start directory = " + start_file.getAbsolutePath());
247 while(!start_file.equals(collection_dir)) {
248 ///ystem.err.println("Blip!");
249 search_files.add(0, new MetadataXMLFileSearch(start_file, filename));
250 if(filename != null) {
251 filename = start_file.getName() + "/" + filename;
252 }
253 else {
254 filename = start_file.getName() + "/";
255 }
256 start_file = start_file.getParentFile();
257 ///ystem.err.println("Start directory = " + start_file.getAbsolutePath());
258 }
259 // Now search each of these metadata xml for metadata, remembering to accumulate or overwrite as we go along.
260 for(int i = 0; i < search_files.size(); i++) {
261 MetadataXMLFileSearch a_search = (MetadataXMLFileSearch) search_files.get(i);
262 ///ystem.err.println("Search " + a_search.file.getAbsolutePath() + File.separator + "metadata.xml for " + (a_search.filename != null ? a_search.filename : "directory metadata"));
263 // Retrieve the document
264 GDMDocument document = getDocument(a_search.file);
265 if(document != null) {
266 // There is one piece of slight of hand here. You can never remove metadata during a get all metadata.
267 metadata = document.getMetadata(a_search.filename, false, metadata, a_search.file, true);
268 ///ystem.err.println("Current metadata: " + toString(metadata));
269 document = null;
270 }
271 a_search = null;
272 }
273 start_file = null;
274 collection_dir = null;
275 filename = null;
276 search_files.clear();
277 search_files = null;
278 return metadata;
279 }
280
281 public String toString(ArrayList list) {
282 StringBuffer text = new StringBuffer("(");
283 for(int i = 0; list != null && i < list.size(); i++) {
284 text.append((list.get(i)).toString());
285 if(i < list.size() - 1) {
286 text.append(", ");
287 }
288 }
289 text.append(")");
290 return text.toString();
291 }
292
293 private class MetadataXMLFileSearch {
294 public File file;
295 public String filename;
296 public MetadataXMLFileSearch(File file, String filename) {
297 this.file = file;
298 this.filename = filename;
299 }
300 }
301
302 /** Called whenever the metadata value changes in some way, such as the addition of a new value. This is the only event type we care about, but we care about it a lot. It tells us what metadata to add, remove, etc from the cached metadata.xml files. Note that this method is synchronized so that the data objects don't need to be.
303 * @param event A <strong>MSMEvent</strong> containing details of the event that caused this message to be fired.
304 * @see org.greenstone.gatherer.msm.GDMDocument
305 * @see org.greenstone.gatherer.msm.Metadata
306 * @see org.greenstone.gatherer.util.HashMap3D
307 */
308 public synchronized void metadataChanged(MSMEvent event) {
309 ///ystem.err.println("Recieved Event: " + event.toString());
310 File file = event.getFile();
311 if(file == null) {
312 FileNode record = event.getRecord();
313 file = record.getFile();
314 }
315 Metadata new_metadata = event.getNewMetadata();
316 Metadata old_metadata = event.getOldMetadata();
317 // These metadata objects may be new instances of metadata objects that already exist. Replace them if they are.
318 new_metadata = checkCache(new_metadata);
319 old_metadata = checkCache(old_metadata);
320 // Now apply the change to the document in question.
321 GDMDocument metadata_xml = getDocument(file);
322 if(metadata_xml != null) {
323 if(old_metadata != null) {
324 // File level
325 if(file.isFile()) {
326 metadata_xml.removeMetadata(file.getName(), old_metadata);
327 }
328 // Folder level
329 else {
330 metadata_xml.removeMetadata(null, old_metadata);
331 }
332 }
333 if(new_metadata != null) {
334 // File level
335 if(file.isFile()) {
336 metadata_xml.addMetadata(file.getName(), new_metadata, event.getAction() == MetaEditPrompt.ACCUMULATE);
337 }
338 else {
339 metadata_xml.addMetadata(null, new_metadata, event.getAction() == MetaEditPrompt.ACCUMULATE);
340 }
341 }
342 }
343 }
344
345 public ArrayList removeMetadata(File file) {
346 return getMetadata(file, true, false);
347 }
348
349 /** Causes all currently loaded GDMDocuments to write themselves out.
350 * @see org.greenstone.gatherer.msm.GDMDocument
351 */
352 public void save() {
353 Iterator iterator = keySet().iterator();
354 while(iterator.hasNext()) {
355 File file = (File) iterator.next();
356 GDMDocument document = (GDMDocument) get(file);
357 if(!document.isUpToDate()) {
358 //ystem.err.println("Saving: " + file.getAbsolutePath());
359 // First purge any old references.
360 document.getMetadata(null, false, null, null, false, true);
361 // If there is no metadata in this document then don't write out a file. In fact delete any file that already exists.
362 int count = document.countMetadata();
363 if(count > 0) {
364 // Now write the xml
365 Utility.export(document.getDocument(), file);
366 document.setUpToDate(true);
367 }
368 else if(file.exists()) {
369 file.delete();
370 }
371 }
372 }
373 }
374 /** Used to cause the document associated with a particular file to write the latest copy of itself to disk. */
375 public void save(FileNode node) {
376 File file = node.getFile();
377 if(file != null && file.isFile()) {
378 GDMDocument document = getDocument(file);
379 File xml_file;
380 if(file.isFile()) {
381 xml_file = new File(file.getParentFile(), "metadata.xml");
382 }
383 else {
384 xml_file = new File(file, "metadata.xml");
385 }
386 if(document != null && !document.isUpToDate()) {
387 // First purge any old references.
388 document.getMetadata(null, false, null, null, true);
389 // Now write the xml
390 Utility.export(document.getDocument(), xml_file);
391 document.setUpToDate(true);
392 }
393 xml_file = null;
394 document = null;
395 }
396 file = null;
397 }
398
399 /** Write out the latest copy of a certain document. */
400 public void save(File file, GDMDocument document) {
401 if(!document.isUpToDate()) {
402 // First purge any old references.
403 document.getMetadata(null, false, null, null, true);
404 // Now write the xml
405 Utility.export(document.getDocument(), file);
406 document.setUpToDate(true);
407 }
408 }
409
410
411 /** Method that is called whenever the metadata set collection changes in some way, such as the addition of a new set or the merging of two sets. If a set changes, mark all cached GDMDocuments as being stale.
412 * @param event A <strong>MSMEvent</strong> containing details of the event that caused this message to be fired.
413 */
414 public void setChanged(MSMEvent event) {
415 for(Iterator values = values().iterator(); values.hasNext(); ) {
416 GDMDocument document = (GDMDocument) values.next();
417 document.setUpToDate(false);
418 document = null;
419 }
420 }
421 /** Called whenever the value tree of an metadata element changes in some way, such as the addition of a new value. --While the comments below are now obsolete, I'll keep them just to remind me of how easy it is to back yourself into a corner with issues such as caching and persisitant references--. Such an action would require us to painstakingly reload every metadata.xml file using the value model prior to the change, then painstakingly write out each metadata.xml file again using the modified model, but I'm a glutton for punishment so thats ok. The alternative is to not do this and watch in horror as heirarchy references quickly fall into disarray, pointing to the wrong place. This task gets even more complicated by three facts:<br>1. We want to do this is a seperate thread, as we don't want the program to come to a screaming halt while we're updating metadata.xml files.<br>2. We have to prevent any metadata.xml files being removed from cache while we're doing this, as if we encounter these more recently written files their heirarchy references will already be correct and that will balls up our little process. Note that this means the saving process may have to block while pending metadata heirarchy updates are in progress.<br>3. Regarding (2) we don't have to rewrite any metadata.xml files already in cache as they will be correctly written out whenever they happen to be dumped from cache.<br>4. We need the ability to pre-empt the general update to load a user demanded metadata.xml and store it in cache, using the old value tree model as per usual, and<br>5. We have to store a cue of these events, and process them one at a time. Perhaps one day when I'm feeling masacistic I'll figure out someway to merge several updates into one, but for now we have to change the tree one node at a time in order for references to remain correct.<br>Ok, so thats five facts, but you get the gist. Not an easy task, but crucial for accurate storage and recall of metadata heirarchies.
422 * @param event A <strong>MSMEvent</strong> containing details of the event that caused this message to be fired.
423 */
424 public void valueChanged(MSMEvent event) {}
425
426 public void waitUntilComplete() {
427 //if(gdm_loader != null) {
428 // gdm_loader.waitUntilComplete();
429 //}
430 }
431
432 private Metadata checkCache(Metadata metadata) {
433 if(metadata != null) {
434 ///ystem.err.println("Search for " + metadata.toString());
435 if(metadata_cache.contains(metadata.getElement(), metadata.getValueNode())) {
436 metadata = (Metadata) metadata_cache.get(metadata.getElement(), metadata.getValueNode());
437 }
438 }
439 return metadata;
440 }
441
442 /** A separately threaded class to load all of the current metadata.xml files. Note that files can still be loaded on demand if they're not already in the cache. Also provides the functionality to block any other thread until the loading is complete, such as is necessary when moving values about in the value tree hierarchy. */
443 /*
444 private class GDMLoader
445 extends Thread {
446 private boolean complete = false;
447 private boolean dummy_load = false;
448
449 GDMLoader(boolean dummy_load) {
450 super("blarg");
451 this.dummy_load = dummy_load;
452 }
453
454
455 public void run() {
456 // Can't open a collections metadata when the collection isn't open!
457 while(!Gatherer.c_man.ready()) {
458 try {
459 wait(100);
460 }
461 catch(Exception error) {
462 }
463 }
464 // Now for each non-file directory in the tree, ask it to load its metadata
465 ArrayList remaining = new ArrayList();
466 remaining.add((FileNode)Gatherer.c_man.getRecordSet().getRoot());
467 int remaining_size = 0;
468 while((remaining_size = remaining.size()) > 0) {
469 FileNode record = (FileNode) remaining.remove(remaining_size - 1);
470 if(!record.isLeaf()) {
471 ///atherer.println("Retrieving metadata.xml for " + record);
472 if (this.dummy_load) {
473 dummyGetMetadata(record.getFile());
474 } else {
475 getMetadata(record.getFile());
476 }
477 for(int i = 0; i < record.getChildCount(); i++) {
478 remaining.add(record.getChildAt(i));
479 }
480
481 ///atherer.println("Retrieving metadata.xml for " + record);
482 getMetadata(record.getFile());
483
484 record.unmap();
485 }
486 record = null;
487 }
488 remaining = null;
489 complete = true;
490 }
491 public void waitUntilComplete() {
492 try {
493 while(!complete) {
494 sleep(100); // 1 second hopefully.
495 }
496 }
497 catch(Exception error) {
498 Gatherer.printStackTrace(error);
499 }
500 }
501 }
502 */
503
504 protected boolean removeEldestEntry(Map.Entry eldest) {
505 if(size() > MAX_DOCUMENTS) {
506 // Save the oldest document before its dumped
507 File file = (File) eldest.getKey();
508 ///ystem.err.println("Dumping oldest Document: " + file.getAbsolutePath());
509 GDMDocument document = (GDMDocument) eldest.getValue();
510 save(file, document);
511 // And then dump it
512 return true;
513 }
514 else {
515 return false;
516 }
517 }
518}
Note: See TracBrowser for help on using the repository browser.