Changeset 5040
- Timestamp:
- 2003-07-25T16:44:43+12:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gli/src/org/greenstone/gatherer/msm/parsers/GreenstoneMetadataParser.java
r4619 r5040 1 package org.greenstone.gatherer.msm.parsers;2 1 /** 3 2 *######################################################################### … … 7 6 * University of Waikato, New Zealand. 8 7 * 9 * <BR><BR>10 *11 8 * Author: John Thompson, Greenstone Digital Library, University of Waikato 12 9 * 13 * <BR><BR>14 *15 10 * Copyright (C) 1999 New Zealand Digital Library Project 16 *17 * <BR><BR>18 11 * 19 12 * This program is free software; you can redistribute it and/or modify … … 22 15 * (at your option) any later version. 23 16 * 24 * <BR><BR>25 *26 17 * This program is distributed in the hope that it will be useful, 27 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 28 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 29 20 * GNU General Public License for more details. 30 *31 * <BR><BR>32 21 * 33 22 * You should have received a copy of the GNU General Public License … … 36 25 *######################################################################## 37 26 */ 27 package org.greenstone.gatherer.msm.parsers; 28 /************************************************************************************** 29 * Written: ??/??/02 30 * Revised: ??/??/02 - Commented 31 * 25/07/03 - Fix to allow any valid greenstone metadata.xml to be imported from, not just those that occur within a collection. This functionality is accidental and caused by GLI attempting to find a collect.cfg to extract hierarchy file information from, and failing. 32 **************************************************************************************/ 38 33 import java.io.*; 39 34 import java.lang.ref.*; … … 41 36 import java.util.*; 42 37 import java.util.regex.*; 43 import javax.swing. JOptionPane;38 import javax.swing.*; 44 39 import javax.swing.tree.*; 45 40 import org.greenstone.gatherer.Gatherer; … … 55 50 import org.greenstone.gatherer.valuetree.GValueModel; 56 51 import org.greenstone.gatherer.valuetree.GValueNode; 57 import org.w3c.dom.Document; 58 import org.w3c.dom.Element; 59 import org.w3c.dom.Node; 60 import org.w3c.dom.NodeList; 52 import org.w3c.dom.*; 61 53 /** Provides a metadata parser implementation that knows how to locate, prepare for, then import metadata from a previous Greenstone collection. Is aware of such factors as the presence of Metadata Set files and hierarchy files. Updates the profiler where possible to allow for faster subsequent imports from a certain collection. Caches all the information about encountered collections in CollectCFG objects which are softly cached (ie are cached, but are reclaimed before an OutOfMemory exception would be thrown). 62 54 * @author John Thompson, Greenstone Digital Library, University of Waikato … … 137 129 } 138 130 139 // Continue only if we are sure this is a greenstone collection 140 if(collection_dir != null && collect_cfg != null) { 141 // 2. Attempt to merge in any mdses and make note of those that are successfully imported (by removing reference from collect.cfg). 142 ///ystem.err.print("2 "); 131 // 2. If a collection configuration file was found, attempt to merge in any mdses and make note of those that are successfully imported (by removing reference from collect.cfg). 132 ///ystem.err.print("2 "); 133 if(collect_cfg != null) { 143 134 ArrayList mdses = collect_cfg.getMetadataSets(); 144 135 for(int i = 0; i < mdses.size(); i++) { … … 148 139 mdses.clear(); 149 140 mdses = null; 150 151 // 3. Locate all of the metadata.xml files that may have an affect on the origin file. Make sure the metadata.xml closest to the origin files directory is last (to ensure property inheritance regarding accumulate/overwrite). 152 ///ystem.err.print("3 "); 153 ArrayList search_files = new ArrayList(); 154 File file = origin.getFile(); 155 String filename = null; 156 boolean file_level; 157 if(file.isFile()) { 158 file_level = false; 141 } 142 143 // 3. Locate all of the metadata.xml files that may have an affect on the origin file. Make sure the metadata.xml closest to the origin files directory is last (to ensure property inheritance regarding accumulate/overwrite). 144 ///ystem.err.print("3 "); 145 ArrayList search_files = new ArrayList(); 146 File file = origin.getFile(); 147 String filename = null; 148 boolean file_level; 149 if(file.isFile()) { 150 file_level = false; 151 filename = file.getName(); 152 file = file.getParentFile(); 153 } 154 else { 155 file_level = true; 156 } 157 while(file != null && (collection_dir == null || !file.equals(collection_dir))) { 158 File test_file = new File(file, Utility.METADATA_XML); 159 if(test_file.exists()) { 160 search_files.add(0, new MetadataXMLFileSearch(test_file, filename)); 161 } 162 if(filename != null) { 163 filename = file.getName() + SEPARATOR + filename; 164 } 165 else { 159 166 filename = file.getName(); 160 file = file.getParentFile(); 161 } 162 else { 163 file_level = true; 164 } 165 while(!file.equals(collection_dir)) { 166 File test_file = new File(file, Utility.METADATA_XML); 167 if(test_file.exists()) { 168 search_files.add(0, new MetadataXMLFileSearch(test_file, filename)); 169 } 170 if(filename != null) { 171 filename = file.getName() + SEPARATOR + filename; 167 } 168 file = file.getParentFile(); 169 } 170 filename = null; 171 file = null; 172 // Start with an initially empty ArrayList of metadata 173 ArrayList metadata = new ArrayList(); 174 // Now search each of these metadata xml for metadata, remembering to accumulate or overwrite as we go along. 175 for(int i = 0; i < search_files.size(); i++) { 176 MetadataXMLFileSearch a_search = (MetadataXMLFileSearch) search_files.get(i); 177 ///ystem.err.println("Search " + a_search.file.getAbsolutePath() + " for " + (a_search.filename != null ? a_search.filename : ".*")); 178 // Retrieve the document 179 BasicGDMDocument document = getDocument(a_search.file); 180 if(document != null) { 181 // If this is a dummy run, our original source file is actually the metadata.xml file and we retrieve all metadata for this collection, as if accumulated! 182 if(dummy_run) { 183 metadata = document.getAllMetadata(); 172 184 } 173 185 else { 174 filename = file.getName(); 175 } 176 file = file.getParentFile(); 177 } 178 filename = null; 179 file = null; 180 // Start with an initially empty ArrayList of metadata 181 ArrayList metadatum = new ArrayList(); 182 // Now search each of these metadata xml for metadata, remembering to accumulate or overwrite as we go along. 183 for(int i = 0; i < search_files.size(); i++) { 184 MetadataXMLFileSearch a_search = (MetadataXMLFileSearch) search_files.get(i); 185 ///ystem.err.println("Search " + a_search.file.getAbsolutePath() + " for " + (a_search.filename != null ? a_search.filename : ".*")); 186 // Retrieve the document 187 BasicGDMDocument document = getDocument(a_search.file); 188 if(document != null) { 189 // If this is a dummy run, our original source file is actually the metadata.xml file and we retrieve all metadata for this collection, as if accumulated! 190 if(dummy_run) { 191 metadatum = document.getAllMetadata(); 192 } 193 else { 194 metadatum = document.getMetadata(a_search.filename, metadatum, folder_level); 195 } 196 document = null; 197 } 198 a_search = null; 199 } 200 search_files = null; 201 // Finally assign the metadata 202 ///ystem.err.println("Found " + metadatum.size() + " pieces of metadata for " + destination); 203 if(metadatum.size() > 0) { 204 addMetadata(destination, metadatum, collection_dir, collect_cfg, dummy_run); 205 } 206 } 207 else { 208 ///ystem.err.println("Not a greenstone collection (no collect.cfg found)."); 186 metadata = document.getMetadata(a_search.filename, metadata, folder_level); 187 } 188 document = null; 189 } 190 a_search = null; 191 } 192 search_files = null; 193 // Finally assign the metadata 194 ///ystem.err.println("Found " + metadata.size() + " pieces of metadata for " + destination); 195 if(metadata.size() > 0) { 196 addMetadata(origin, destination, metadata, collection_dir, collect_cfg, dummy_run); 209 197 } 210 198 return dialog_cancelled; … … 215 203 } 216 204 217 private void addMetadata(FileNode destination, ArrayList metadatum, File collection_dir, CollectCFG collect_cfg, boolean dummy_run) {205 private void addMetadata(FileNode origin, FileNode destination, ArrayList metadata, File collection_dir, CollectCFG collect_cfg, boolean dummy_run) { 218 206 // before we try to addMetadata, we need to check that there are some metadata sets for the collection - otherwise we cant add or import 219 207 Vector meta_sets = Gatherer.c_man.getCollection().msm.getSets(false); … … 230 218 ///ystem.err.print("6 "); 231 219 // Used in a complicated test later on. 232 for(int i = 0; !dialog_cancelled && i < metadat um.size(); i++) {233 BasicMetadata basic_metadata = ( BasicMetadata) metadatum.get(i);234 BasicMetadata metadat a = (BasicMetadata) metadatum.get(i);235 metadat a.collection = collection_dir;220 for(int i = 0; !dialog_cancelled && i < metadata.size(); i++) { 221 BasicMetadata basic_metadata = ((BasicMetadata) metadata.get(i)).copy(); 222 BasicMetadata metadatum = (BasicMetadata) metadata.get(i); 223 metadatum.collection = collection_dir; // May be null. Doesn't matter. 236 224 Metadata final_metadata = null; 237 225 // If this BasicMetadata already exists in the transform cache then we can save ourselves a lot of work. … … 242 230 if(final_metadata == null) { 243 231 ///ystem.err.println("No existing Metadata object for BasicMetadata: " + basic_metadata); 244 // 6a. Check if an hfile is associated with this metadata, and if so load it, cache it in the collection.cfg object, then resolve metadata value index. 245 HFile h_file = collect_cfg.getHFile(metadata.element); 246 if(h_file != null && !dummy_run) { 247 ///ystem.err.print(metadata.value + " maps to "); 248 metadata.value = h_file.getValue(metadata.value); 249 ///ystem.err.println(metadata.value); 250 } 251 h_file = null; 252 // 6b. Check if there is a profile regarding the current metadata. 253 ///ystem.err.println("Retrieve existing action: " + collection_dir.getAbsolutePath() + ", " + metadata.element); 254 if(Gatherer.c_man.getCollection().msm.profiler.containsAction(collection_dir.getAbsolutePath(), metadata.element)) { 255 String new_element_name = Gatherer.c_man.getCollection().msm.profiler.getAction(collection_dir.getAbsolutePath(), metadata.element); 256 ///ystem.err.println("Profile result = " + new_element_name); 257 if(new_element_name == null) { 258 metadata = null; 259 } 260 else { 261 metadata.element = new_element_name; 262 } 263 new_element_name = null; 264 } 265 ///atherer.println("Assigning metadata."); 266 if(metadata != null) { 232 // 6a. Check if an hfile is associated with this metadata, and if so load it, cache it in the collection.cfg object, then resolve metadata value index. Of course we can only do this if a collection configuration file was found in the first place. 233 if(collect_cfg != null) { 234 HFile h_file = collect_cfg.getHFile(metadatum.element); 235 if(h_file != null && !dummy_run) { 236 ///ystem.err.print(metadata.value + " maps to "); 237 metadatum.value = h_file.getValue(metadatum.value); 238 ///ystem.err.println(metadatum.value); 239 } 240 h_file = null; 241 } 242 // 6b. Check if there is a profile regarding the current metadata. The profile may be stored for the collection directory, or if no such directory is available, then try the ancestor folders of the origin file. 243 ///ystem.err.println("Retrieve existing action: " + collection_dir.getAbsolutePath() + ", " + metadatum.element); 244 if(collection_dir != null) { 245 // Note that the first test is whether a profile action exist, while the 'getAction' can return null as the profile action. 246 if(Gatherer.c_man.getCollection().msm.profiler.containsAction(collection_dir.getAbsolutePath(), metadatum.element)) { 247 String new_element_name = Gatherer.c_man.getCollection().msm.profiler.getAction(collection_dir.getAbsolutePath(), metadatum.element); 248 ///ystem.err.println("Profile result = " + new_element_name); 249 if(new_element_name == null) { 250 metadatum = null; 251 } 252 else { 253 metadatum.element = new_element_name; 254 } 255 new_element_name = null; 256 } 257 } 258 else { 259 boolean found = false; 260 File current_folder = origin.getFile().getParentFile(); 261 while(!found && metadatum != null && current_folder != null) { 262 if(Gatherer.c_man.getCollection().msm.profiler.containsAction(current_folder.getAbsolutePath(), metadatum.element)) { 263 found = true; 264 String new_element_name = Gatherer.c_man.getCollection().msm.profiler.getAction(current_folder.getAbsolutePath(), metadatum.element); 265 ///ystem.err.println("Profile result = " + new_element_name); 266 if(new_element_name == null) { 267 metadatum = null; 268 } 269 else { 270 metadatum.element = new_element_name; 271 } 272 new_element_name = null; 273 } 274 else { 275 current_folder = current_folder.getParentFile(); 276 } 277 } 278 current_folder = null; 279 } 280 ///atherer.println("Assigning metadatum."); 281 if(metadatum != null) { 267 282 // 6c. Try to add metadata. If there is no matching metadata element: 268 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(metadat a.element);283 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(metadatum.element); 269 284 // Arg. The element returned may come from the Greenstone dls, which of course should never be involved during importing. To solve check the namespace isn't "" and if it is nullify the element. Nullify. NULLIFY, Bwuhahahaha... 270 285 if(element != null && element.getNamespace().equals("")) { … … 273 288 // 6ci. If no match exists, prompt the user to add/merge with specific metadata element. The user can also choose to ignore this metadata. 274 289 if(element == null) { 275 element = selectElement(metadat a.element);290 element = selectElement(metadatum.element); 276 291 if(!dialog_cancelled) { 277 292 // 6ciii. If either of the above work, remember to add to profile. 278 293 if(element == null) { 279 ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadata.element + ", null"); 280 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadata.element, null); 294 ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadatum.element + ", null"); 295 if(collection_dir != null) { 296 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadatum.element, null); 297 } 298 else { 299 Gatherer.c_man.getCollection().msm.profiler.addAction(origin.getFile().getParentFile().getAbsolutePath(), metadatum.element, null); 300 } 281 301 } 282 302 else { 283 ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadata.element + ", " + element.getName()); 284 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadata.element, element.getName()); 303 ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadatum.element + ", " + element.getName()); 304 if(collection_dir != null) { 305 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadatum.element, null); 306 } 307 else { 308 Gatherer.c_man.getCollection().msm.profiler.addAction(origin.getFile().getParentFile().getAbsolutePath(), metadatum.element, null); 309 } 285 310 } 286 311 } … … 292 317 if(model != null) { 293 318 // One little 'fix' for importing from the demo or dls files. The Title metadata found in the metadata.xml isn't used in preference for the automatically extracted Titles. However we want to use them, so we should remove '.*(<filename>)$' for a certain file <filename>. 294 String raw_value = metadat a.value.trim();319 String raw_value = metadatum.value.trim(); 295 320 String filename_munged = destination.getFile().getName(); 296 321 int index = -1; … … 299 324 } 300 325 filename_munged = "(" + filename_munged + ")"; 301 ///atherer.println("Hack: filename = " + destination.getFile().getName() + ", munged = " + filename_munged + ", raw_value = " + raw_value);302 326 if(raw_value.endsWith(filename_munged)) { 303 327 raw_value = (raw_value.substring(0, raw_value.length() - filename_munged.length())).trim(); … … 305 329 GValueNode node = model.addValue(raw_value); 306 330 final_metadata = new Metadata(element, node); 307 ///ystem.err.println("Adding final metadata: " + metadat a.toString());331 ///ystem.err.println("Adding final metadata: " + metadatum.toString()); 308 332 node = null; 309 333 } … … 322 346 } 323 347 if(!dummy_run && final_metadata != null && !dialog_cancelled) { 324 final_metadata.setAccumulate(metadat a.accumulates);348 final_metadata.setAccumulate(metadatum.accumulates); 325 349 // Now we can finally add the metadata. 326 350 ///ystem.err.println("Adding Metadata: " + final_metadata); … … 329 353 // Otherwise there is no way to add this metadata. No value model no metadata value. 330 354 final_metadata = null; 331 metadat a= null;355 metadatum = null; 332 356 } 333 357 } … … 623 647 } 624 648 649 public BasicMetadata copy() { 650 return new BasicMetadata(element, value, accumulates); 651 } 652 625 653 public int compareTo(Object other) { 626 654 return toString().compareTo(other.toString()); … … 632 660 public boolean equals(Object object) { 633 661 BasicMetadata other = (BasicMetadata) object; 634 if(collection != null ) {662 if(collection != null && other.collection != null) { 635 663 return (collection.equals(other.collection) && element.equals(other.element) && value.equals(other.value)); 636 664 }
Note:
See TracChangeset
for help on using the changeset viewer.