Changeset 4365 for trunk/gli/src/org/greenstone/gatherer/msm/parsers
- Timestamp:
- 2003-05-27T15:49:22+12:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gli/src/org/greenstone/gatherer/msm/parsers/GreenstoneMetadataParser.java
r4316 r4365 62 62 */ 63 63 public class GreenstoneMetadataParser 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 64 extends LinkedHashMap 65 implements MetadataParser { 66 67 static final private int MAX_CFG_CACHE_SIZE = 10; 68 static final private int MAX_GDM_CACHE_SIZE = 10; 69 /** The default name and location for a collection configuration file (presuming that a collection file prefix will be added). */ 70 static final private String CONFIG_FILENAME = "etc" + File.separator + "collect.cfg"; 71 /** The pattern to match when searching for directory level assignments. */ 72 static final private String DIRECTORY_FILENAME = ".*"; 73 static final private String DIRECTORY_FILENAME_SUFFIX = "/.*"; 74 static final private String DESCRIPTION_ELEMENT = "Description"; 75 static final private String FILENAME_ELEMENT = "FileName"; 76 static final private String FILESET_ELEMENT = "FileSet"; 77 /** The name of a gdm file. */ 78 static final private String GIMPORT = "gimport"; 79 static final private String IMPORT = "import"; 80 static final private String METADATA_ELEMENT = "Metadata"; 81 static final private String METADATA_XML_FILENAME = "metadata.xml"; 82 static final private String MODE_ATTRIBUTE = "mode"; 83 static final private String NAME_ATTRIBUTE = "name"; 84 static final private String SEPARATOR = "/"; 85 86 /** A list of the collect.cfg paths that we should ignore. */ 87 private ArrayList ignore_list = new ArrayList(); 88 /** Has this process been cancelled. */ 89 private boolean dialog_cancelled = false; 90 /** A cache of previously parsed collection configuration files. */ 91 private CollectCFGCache cfg_cache = new CollectCFGCache(); 92 /** A mapping from BasicMetadata to their fully enabled Metadata incarnation. */ 93 private HashMap transform = new HashMap(); 94 95 /** Default constructor needed for dynamic class loading. */ 96 public GreenstoneMetadataParser() { 97 } 98 /** Locate and import any metadata parsed by this metadata parser given the file involved and its previous incarnation. */ 99 public boolean process(FileNode destination, FileNode origin, boolean folder_level, boolean dummy_run) { 100 ///atherer.println("GreenstoneMetadataParser: Process " + origin + ": "); 101 int counter = 0; 102 dialog_cancelled = false; 103 104 // 1. Determine what collection the file is in, and load/parse the appropriate collect.cfg. Cache collect.cfg object. 105 ///ystem.err.print("1 "); 106 // Start at the origin node file. If its a file get its parent directory. 107 File collection_dir = origin.getFile(); 108 if(collection_dir.isFile()) { 109 collection_dir = collection_dir.getParentFile(); 110 } 111 // We're currently in the importing directory so we'll go one more step up. 112 collection_dir = collection_dir.getParentFile(); 113 // We are looking for a directory which contains a etc/collect.cfg file and either an import or a gimport directory. 114 boolean found = false; 115 while(!found && collection_dir != null) { 116 File possible_cfg_file = new File(collection_dir, CONFIG_FILENAME); 117 File possible_gimport_directory = new File(collection_dir, GIMPORT); 118 File possible_import_directory = new File(collection_dir, IMPORT); 119 if(possible_cfg_file.exists() && (possible_gimport_directory.exists() || possible_import_directory.exists())) { 120 found = true; 121 ///ystem.err.println("Found greenstone collection at " + collection_dir.getAbsolutePath()); 122 } 123 else { 124 collection_dir = collection_dir.getParentFile(); 125 } 126 } 127 128 // Now retrieve the configuration file if there is one. 129 CollectCFG collect_cfg = null; 130 if(collection_dir != null) { 131 File collect_cfg_file = new File(collection_dir, CONFIG_FILENAME); 132 if(collect_cfg_file.exists()) { 133 collect_cfg = cfg_cache.get(collect_cfg_file); 134 } 135 } 136 137 // Continue only if we are sure this is a greenstone collection 138 if(collection_dir != null && collect_cfg != null) { 139 139 // 2. Attempt to merge in any mdses and make note of those that are successfully imported (by removing reference from collect.cfg). 140 140 ///ystem.err.print("2 "); 141 142 143 144 145 146 147 141 ArrayList mdses = collect_cfg.getMetadataSets(); 142 for(int i = 0; i < mdses.size(); i++) { 143 File mds_file = (File) mdses.get(i); 144 Gatherer.c_man.getCollection().msm.importMDS(mds_file, false); 145 } 146 mdses.clear(); 147 mdses = null; 148 148 149 149 // 3. Locate all of the metadata.xml files that may have an affect on the origin file. Make sure the metadata.xml closest to the origin files directory is last (to ensure property inheritance regarding accumulate/overwrite). 150 150 ///ystem.err.print("3 "); 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 151 ArrayList search_files = new ArrayList(); 152 File file = origin.getFile(); 153 String filename = null; 154 if(file.isFile()) { 155 filename = file.getName(); 156 file = file.getParentFile(); 157 } 158 while(!file.equals(collection_dir)) { 159 File test_file = new File(file, Utility.METADATA_XML); 160 if(test_file.exists()) { 161 search_files.add(0, new MetadataXMLFileSearch(test_file, filename)); 162 } 163 if(filename != null) { 164 filename = file.getName() + SEPARATOR + filename; 165 } 166 else { 167 filename = file.getName(); 168 } 169 file = file.getParentFile(); 170 } 171 filename = null; 172 file = null; 173 173 // Start with an initially empty ArrayList of metadata 174 174 ArrayList metadatum = new ArrayList(); 175 175 // Now search each of these metadata xml for metadata, remembering to accumulate or overwrite as we go along. 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 176 for(int i = 0; i < search_files.size(); i++) { 177 MetadataXMLFileSearch a_search = (MetadataXMLFileSearch) search_files.get(i); 178 ///ystem.err.println("Search " + a_search.file.getAbsolutePath() + " for " + (a_search.filename != null ? a_search.filename : ".*")); 179 // Retrieve the document 180 BasicGDMDocument document = getDocument(a_search.file); 181 if(document != null) { 182 // If this is a dummy run, our original source file is actually the metadata.xml file and we retrieve all metadata for this collection, as if accumulated! 183 if(dummy_run) { 184 metadatum = document.getAllMetadata(); 185 } 186 else { 187 metadatum = document.getMetadata(a_search.filename, metadatum, true); 188 } 189 document = null; 190 } 191 a_search = null; 192 } 193 search_files = null; 194 194 // Finally assign the metadata 195 195 ///ystem.err.println("Found " + metadatum.size() + " pieces of metadata for " + destination); 196 if(metadatum.size() > 0) { 197 addMetadata(destination, metadatum, collection_dir, collect_cfg, dummy_run); 196 if(metadatum.size() > 0) { 197 addMetadata(destination, metadatum, collection_dir, collect_cfg, dummy_run); 198 } 199 } 200 else { 201 ///ystem.err.println("Not a greenstone collection (no collect.cfg found)."); 202 } 203 return dialog_cancelled; 204 } 205 206 protected boolean removeEldestEntry(java.util.Map.Entry entry) { 207 return (size() > MAX_GDM_CACHE_SIZE); 208 } 209 210 private void addMetadata(FileNode destination, ArrayList metadatum, File collection_dir, CollectCFG collect_cfg, boolean dummy_run) { 211 ///ystem.err.print("6 "); 212 // Used in a complicated test later on. 213 for(int i = 0; !dialog_cancelled && i < metadatum.size(); i++) { 214 BasicMetadata basic_metadata = (BasicMetadata) metadatum.get(i); 215 BasicMetadata metadata = (BasicMetadata) metadatum.get(i); 216 metadata.collection = collection_dir; 217 Metadata final_metadata = null; 218 // If this BasicMetadata already exists in the transform cache then we can save ourselves a lot of work. 219 SoftReference reference = (SoftReference) transform.get(basic_metadata); 220 if(reference != null) { 221 final_metadata = (Metadata) reference.get(); 222 } 223 if(final_metadata == null) { 224 ///ystem.err.println("No existing Metadata object for BasicMetadata: " + basic_metadata); 225 // 6a. Check if an hfile is associated with this metadata, and if so load it, cache it in the collection.cfg object, then resolve metadata value index. 226 HFile h_file = collect_cfg.getHFile(metadata.element); 227 if(h_file != null && !dummy_run) { 228 ///ystem.err.print(metadata.value + " maps to "); 229 metadata.value = h_file.getValue(metadata.value); 230 ///ystem.err.println(metadata.value); 231 } 232 h_file = null; 233 // 6b. Check if there is a profile regarding the current metadata. 234 ///ystem.err.println("Retrieve existing action: " + collection_dir.getAbsolutePath() + ", " + metadata.element); 235 if(Gatherer.c_man.getCollection().msm.profiler.containsAction(collection_dir.getAbsolutePath(), metadata.element)) { 236 String new_element_name = Gatherer.c_man.getCollection().msm.profiler.getAction(collection_dir.getAbsolutePath(), metadata.element); 237 ///ystem.err.println("Profile result = " + new_element_name); 238 if(new_element_name == null) { 239 metadata = null; 240 } 241 else { 242 metadata.element = new_element_name; 243 } 244 new_element_name = null; 245 } 246 ///atherer.println("Assigning metadata."); 247 if(metadata != null) { 248 // 6c. Try to add metadata. If there is no matching metadata element: 249 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(metadata.element); 250 // Arg. The element returned may come from the Greenstone dls, which of course should never be involved during importing. To solve check the namespace isn't "" and if it is nullify the element. Nullify. NULLIFY, Bwuhahahaha... 251 if(element != null && element.getNamespace().equals("")) { 252 element = null; 253 } 254 // 6ci. If no match exists, prompt the user to add/merge with specific metadata element. The user can also choose to ignore this metadata. 255 if(element == null) { 256 element = selectElement(metadata.element); 257 if(!dialog_cancelled) { 258 // 6ciii. If either of the above work, remember to add to profile. 259 if(element == null) { 260 ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadata.element + ", null"); 261 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadata.element, null); 262 } 263 else { 264 ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadata.element + ", " + element.getName()); 265 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadata.element, element.getName()); 266 } 267 } 268 } 269 // - Add metadata 270 if(!dummy_run && element != null && !dialog_cancelled) { 271 ///ystem.err.println("Retrieve the value tree for " + element.toString()); 272 GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(element); 273 if(model != null) { 274 GValueNode node = model.addValue(metadata.value); 275 final_metadata = new Metadata(element, node); 276 ///ystem.err.println("Adding final metadata: " + metadata.toString()); 277 node = null; 278 } 279 model = null; 280 } 281 element = null; 282 } 283 // If we have successfully created a Metadata from the BasicMetadata, store it 284 if(final_metadata != null && !dialog_cancelled) { 285 transform.put(basic_metadata, new SoftReference(final_metadata)); 286 ///ystem.err.println("Add a Metadata object for BasicMetadata: " + basic_metadata); 287 } 288 } 289 else { 290 ///ystem.err.println("Found a Metadata object for BasicMetadata: " + basic_metadata); 291 } 292 if(!dummy_run && final_metadata != null && !dialog_cancelled) { 293 final_metadata.setAccumulate(metadata.accumulates); 294 // Now we can finally add the metadata. 295 ///ystem.err.println("Adding Metadata: " + final_metadata); 296 Gatherer.c_man.getCollection().msm.fireMetadataChanged(0, destination, null, final_metadata); 297 } 298 // Otherwise there is no way to add this metadata. No value model no metadata value. 299 final_metadata = null; 300 metadata = null; 301 } 302 } 303 304 /** Determine the different suffix between two string. 305 * @param base_str The base <strong>String</strong>, expected to be the short of the two strings provided. 306 * @param target_str The target <strong>String</strong>, whose differing suffix is returned. 307 * @return A <strong>String</strong> containing the suffix from target which is different from base. 308 */ 309 private String diff(String base_str, String target_str) { 310 StringTokenizer base_tokenizer = new StringTokenizer(base_str, File.separator); 311 StringTokenizer target_tokenizer = new StringTokenizer(target_str, File.separator); 312 String base = null; 313 String target = null; 314 while(base_tokenizer.hasMoreTokens() && (base = base_tokenizer.nextToken()).equals((target = target_tokenizer.nextToken()))) { 315 } 316 StringBuffer result = new StringBuffer(target); 317 while(target_tokenizer.hasMoreTokens()) { 318 result.append(File.separator); 319 result.append(target_tokenizer.nextToken()); 320 } 321 return result.toString(); 322 } 323 324 /** Retrieve the BasicGDMDocument found at the given file, or null if there is no such file or if it isn't a valid BasicGDMDocument. */ 325 private BasicGDMDocument getDocument(File file) { 326 ///ystem.err.println("Get Document at: " + file.getAbsolutePath()); 327 BasicGDMDocument document = null; 328 if(!ignore_list.contains(file) && file.exists()) { 329 // Check cache 330 SoftReference reference = (SoftReference) get(file); 331 if(reference != null) { 332 ///ystem.err.println("Hit!!"); 333 document = (BasicGDMDocument) reference.get(); 334 reference = null; 335 } 336 // If that didn't work try to parse in the document 337 if(document == null) { 338 ///ystem.err.println("Miss or stale reference."); 339 document = new BasicGDMDocument(file); 340 if(document.isValid()) { 341 put(file, new SoftReference(document)); 342 } 343 else { 344 ///ystem.err.println(file.getAbsolutePath() + " is not a valid GDM XML file."); 345 ignore_list.add(file); 346 document = null; 347 } 348 } 349 } 350 else { 351 ///ystem.err.println("Ignoring file or file doesn't exists."); 352 } 353 return document; 354 } 355 356 357 /** Display a prompt allowing a user to select a metadata element to attempt to force add/merge or ignore a metadata element to. For instance an old version of a metadata.xml from the DLS collection might have an assigned metadata value "Publisher=EC Courier", however Publisher won't automatically match to any metadata set. This prompt will be displayed, and some effort will be made to systematically locate the appropriate set. In this case this should be the DLS metadata set as dls.Publisher should be the closest match. Regardless the element selected is returned. 358 * @param element_name The name of the element we are trying to add, as a <strong>String</strong>. 359 * @return The <strong>ElementWrapper</strong> choosen by the user, or <i>null</i> to skip this metadata element. 360 */ 361 private ElementWrapper selectElement(String element_name) { 362 ElementWrapper result = Gatherer.c_man.getCollection().msm.prompt.selectElement(element_name); 363 dialog_cancelled = Gatherer.c_man.getCollection().msm.prompt.wasDialogCancelled(); 364 return result; 365 } 366 367 /** A 'basic' version of the more complete GDMDocument used elsewhere, this object provides the same functionality except that it doesn't use Metadata objects. These objects require live references to elements within the MetadataSetManager and GValueModels, but these may not yet exist (and indeed may never exist) for metadata parsed from metadata.xml's outside of our current collection. Thus this class returns a String (or an ArrayList of Strings) when asked for the metadata associated with a certain file. Also notice that this class provides no constructor method for creating a blank document, nor does it ever need a reference to the Gatherer.*/ 368 private class BasicGDMDocument 369 extends HashMap { 370 /** The document this class sources its data from. */ 371 private Document base_document; 372 /** This constructor takes the original document and parsed out and stores metadata with its association to filenames. */ 373 public BasicGDMDocument(File file) { 374 ///ystem.err.println("New BasicGDMDocument: " + file.getAbsolutePath()); 375 base_document = Utility.parse(file.getAbsolutePath(), false); 376 } 377 /** Retrieve all of the metadata in this file. */ 378 public ArrayList getAllMetadata() { 379 ArrayList metadatum = new ArrayList(); 380 // Don't search the cache as this would never have been added. 381 try { 382 // Retrieve the document element. 383 Element directorymetadata_element = base_document.getDocumentElement(); 384 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching. 385 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 386 for(int i = 0; i < fileset_elements.getLength(); i++) { 387 Element fileset_element = (Element) fileset_elements.item(i); 388 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); 389 for(int j = 0; j < filename_elements.getLength(); j++) { 390 Element filename_element = (Element) filename_elements.item(j); 391 // If they match add all of the metadata found in the Description child element, overwriting any metadata with the same element 392 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 393 for(int k = 0; k < description_elements.getLength(); k++) { 394 Element description_element = (Element) description_elements.item(k); 395 NodeList metadata_elements = description_element.getElementsByTagName(METADATA_ELEMENT); 396 for(int l = 0; l < metadata_elements.getLength(); l++) { 397 Element metadata_element = (Element) metadata_elements.item(l); 398 String element = metadata_element.getAttribute(NAME_ATTRIBUTE); 399 BasicMetadata metadata = new BasicMetadata(element, Utility.METADATA_XML, true); 400 // Remove any previous values for this metadata element. 401 for(int m = metadatum.size() - 1; m >= 0; m--) { 402 BasicMetadata old_metadata = (BasicMetadata) metadatum.get(m); 403 if(old_metadata.element.equals(element)) { 404 metadatum.remove(m); 405 } 406 old_metadata = null; 198 407 } 199 } 200 else { 201 ///ystem.err.println("Not a greenstone collection (no collect.cfg found)."); 202 } 203 return dialog_cancelled; 204 } 205 206 protected boolean removeEldestEntry(java.util.Map.Entry entry) { 207 return (size() > MAX_GDM_CACHE_SIZE); 208 } 209 210 private void addMetadata(FileNode destination, ArrayList metadatum, File collection_dir, CollectCFG collect_cfg, boolean dummy_run) { 211 ///ystem.err.print("6 "); 212 // Used in a complicated test later on. 213 for(int i = 0; !dialog_cancelled && i < metadatum.size(); i++) { 214 BasicMetadata basic_metadata = (BasicMetadata) metadatum.get(i); 215 BasicMetadata metadata = (BasicMetadata) metadatum.get(i); 216 metadata.collection = collection_dir; 217 Metadata final_metadata = null; 218 // If this BasicMetadata already exists in the transform cache then we can save ourselves a lot of work. 219 SoftReference reference = (SoftReference) transform.get(basic_metadata); 220 if(reference != null) { 221 final_metadata = (Metadata) reference.get(); 408 // Add the completed metadata and clean up 409 metadatum.add(metadata); 410 metadata = null; 411 element = null; 412 metadata_element = null; 413 } 414 metadata_elements = null; 415 description_element = null; 416 } 417 description_elements = null; 418 filename_element = null; 419 } 420 filename_elements = null; 421 fileset_element = null; 422 } 423 fileset_elements = null; 424 directorymetadata_element = null; 425 } 426 catch (Exception error) { 427 Gatherer.self.printStackTrace(error); 428 } 429 return metadatum; 430 } 431 432 /** Retrieve any metadata associated with a certain file. If filename is null we are attempting to find directory level metadata. */ 433 public ArrayList getMetadata(String filename, ArrayList metadatum_so_far, boolean folder_level) { 434 ///ystem.err.println("Retrieving metadata for: " + filename); 435 ArrayList metadatum = null; 436 // We start by attempting to retrieve this metadata from the cache. 437 if(filename != null) { 438 metadatum = (ArrayList) get(filename); 439 } 440 else { 441 metadatum = (ArrayList) get(DIRECTORY_FILENAME); 442 } 443 // If that failed we consult the document for metadata. 444 if(metadatum == null) { 445 metadatum = new ArrayList(); 446 if(metadatum_so_far == null) { 447 metadatum = new ArrayList(); 448 } 449 else { 450 metadatum = metadatum_so_far; 451 } 452 try { 453 // Retrieve the document element. 454 Element directorymetadata_element = base_document.getDocumentElement(); 455 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching. 456 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 457 for(int i = 0; i < fileset_elements.getLength(); i++) { 458 Element fileset_element = (Element) fileset_elements.item(i); 459 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); 460 for(int j = 0; j < filename_elements.getLength(); j++) { 461 Element filename_element = (Element) filename_elements.item(j); 462 String filename_text = MSMUtils.getValue(filename_element); 463 // Crappy. There are apparently two ways of assigning, say, directory level metadata to anything in the ac01ne directory from a parent directories metadata.xml. 464 // The developers guide way: ac01ne/.* 465 // The dls way: ac01ne 466 // So the three tests are 467 //System.err.println("Check for: " + (filename != null ? filename : ".*")); 468 //System.err.println("Folder level = " + folder_level); 469 //System.err.println("filename != null && '" + filename + "'.matches('" + filename_text + "') = " + (filename != null ? filename.matches(filename_text) : false)); 470 //System.err.println("filename != null && '" + filename + "'.matches('" + filename_text + DIRECTORY_FILENAME_SUFFIX + "') = " + (filename != null ? filename.matches(filename_text + DIRECTORY_FILENAME_SUFFIX) : false)); 471 //System.err.println("filename == null && '" + filename_text + "'.equals('.*') = " + (filename == null ? filename_text.equals(DIRECTORY_FILENAME) : false)); 472 if((filename != null && (filename.matches(filename_text) || filename.matches(filename_text))) 473 || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) { 474 ///ystem.err.println("Match: " + (filename != null ? filename : ".*") + " => " + filename_text); 475 // If they match add all of the metadata found in the Description child element, remembering to abide by desired mode (accumulate vs. overwrite). 476 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 477 for(int k = 0; k < description_elements.getLength(); k++) { 478 Element description_element = (Element) description_elements.item(k); 479 NodeList metadata_elements = description_element.getElementsByTagName(METADATA_ELEMENT); 480 for(int l = 0; l < metadata_elements.getLength(); l++) { 481 Element metadata_element = (Element) metadata_elements.item(l); 482 String element = metadata_element.getAttribute(NAME_ATTRIBUTE); 483 ///ystem.err.println("Found element: " + element); 484 //String language = metadata_element.getAttribute("language"); 485 String mode = metadata_element.getAttribute(MODE_ATTRIBUTE); 486 // Add the new metadata to our list of metadata for this target file. 487 String value = Utility.stripNL(MSMUtils.getValue(metadata_element)); 488 ///ystem.err.println("Found value: " + element); 489 BasicMetadata metadata = new BasicMetadata(element, value, mode.equals("accumulate")); 490 // If mode is overwrite, then remove any previous values for this metadata element. 491 if(!metadata.accumulates) { 492 for(int m = metadatum.size() - 1; m >= 0; m--) { 493 BasicMetadata old_metadata = (BasicMetadata) metadatum.get(m); 494 if(old_metadata.element.equals(element)) { 495 metadatum.remove(m); 496 } 497 old_metadata = null; 498 } 499 } 500 mode = null; 501 502 // Add the completed metadata and clean up 503 metadatum.add(metadata); 504 metadata = null; 505 value = null; 506 element = null; 507 metadata_element = null; 508 } 509 metadata_elements = null; 510 description_element = null; 222 511 } 223 if(final_metadata == null) { 224 ///ystem.err.println("No existing Metadata object for BasicMetadata: " + basic_metadata); 225 // 6a. Check if an hfile is associated with this metadata, and if so load it, cache it in the collection.cfg object, then resolve metadata value index. 226 HFile h_file = collect_cfg.getHFile(metadata.element); 227 if(h_file != null && !dummy_run) { 228 ///ystem.err.print(metadata.value + " maps to "); 229 metadata.value = h_file.getValue(metadata.value); 230 ///ystem.err.println(metadata.value); 231 } 232 h_file = null; 233 // 6b. Check if there is a profile regarding the current metadata. 234 ///ystem.err.println("Retrieve existing action: " + collection_dir.getAbsolutePath() + ", " + metadata.element); 235 if(Gatherer.c_man.getCollection().msm.profiler.containsAction(collection_dir.getAbsolutePath(), metadata.element)) { 236 String new_element_name = Gatherer.c_man.getCollection().msm.profiler.getAction(collection_dir.getAbsolutePath(), metadata.element); 237 ///ystem.err.println("Profile result = " + new_element_name); 238 if(new_element_name == null) { 239 metadata = null; 240 } 241 else { 242 metadata.element = new_element_name; 243 } 244 new_element_name = null; 245 } 246 ///atherer.println("Assigning metadata."); 247 if(metadata != null) { 248 // 6c. Try to add metadata. If there is no matching metadata element: 249 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(metadata.element); 250 // Arg. The element returned may come from the Greenstone dls, which of course should never be involved during importing. To solve check the namespace isn't "" and if it is nullify the element. Nullify. NULLIFY, Bwuhahahaha... 251 if(element != null && element.getNamespace().equals("")) { 252 element = null; 253 } 254 // 6ci. If no match exists, prompt the user to add/merge with specific metadata element. The user can also choose to ignore this metadata. 255 if(element == null) { 256 element = selectElement(metadata.element); 257 if(!dialog_cancelled) { 258 // 6ciii. If either of the above work, remember to add to profile. 259 if(element == null) { 260 ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadata.element + ", null"); 261 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadata.element, null); 262 } 263 else { 264 ///ystem.err.println("Adding profile action: " + collection_dir.getAbsolutePath() + ", " + metadata.element + ", " + element.getName()); 265 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_dir.getAbsolutePath(), metadata.element, element.getName()); 266 } 267 } 268 } 269 // - Add metadata 270 if(!dummy_run && element != null && !dialog_cancelled) { 271 ///ystem.err.println("Retrieve the value tree for " + element.toString()); 272 GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(element); 273 if(model != null) { 274 GValueNode node = model.addValue(metadata.value); 275 final_metadata = new Metadata(element, node); 276 ///ystem.err.println("Adding final metadata: " + metadata.toString()); 277 node = null; 278 } 279 model = null; 280 } 281 element = null; 282 } 283 // If we have successfully created a Metadata from the BasicMetadata, store it 284 if(final_metadata != null && !dialog_cancelled) { 285 transform.put(basic_metadata, new SoftReference(final_metadata)); 286 ///ystem.err.println("Add a Metadata object for BasicMetadata: " + basic_metadata); 287 } 288 } 289 else { 290 ///ystem.err.println("Found a Metadata object for BasicMetadata: " + basic_metadata); 291 } 292 if(!dummy_run && final_metadata != null && !dialog_cancelled) { 293 final_metadata.setAccumulate(metadata.accumulates); 294 // Now we can finally add the metadata. 295 ///ystem.err.println("Adding Metadata: " + final_metadata); 296 Gatherer.c_man.getCollection().msm.fireMetadataChanged(0, destination, null, final_metadata); 297 } 298 // Otherwise there is no way to add this metadata. No value model no metadata value. 299 final_metadata = null; 300 metadata = null; 301 } 302 } 303 304 /** Determine the different suffix between two string. 305 * @param base_str The base <strong>String</strong>, expected to be the short of the two strings provided. 306 * @param target_str The target <strong>String</strong>, whose differing suffix is returned. 307 * @return A <strong>String</strong> containing the suffix from target which is different from base. 308 */ 309 private String diff(String base_str, String target_str) { 310 StringTokenizer base_tokenizer = new StringTokenizer(base_str, File.separator); 311 StringTokenizer target_tokenizer = new StringTokenizer(target_str, File.separator); 312 String base = null; 313 String target = null; 314 while(base_tokenizer.hasMoreTokens() && (base = base_tokenizer.nextToken()).equals((target = target_tokenizer.nextToken()))) { 315 } 316 StringBuffer result = new StringBuffer(target); 317 while(target_tokenizer.hasMoreTokens()) { 318 result.append(File.separator); 319 result.append(target_tokenizer.nextToken()); 320 } 321 return result.toString(); 322 } 323 324 /** Retrieve the BasicGDMDocument found at the given file, or null if there is no such file or if it isn't a valid BasicGDMDocument. */ 325 private BasicGDMDocument getDocument(File file) { 326 ///ystem.err.println("Get Document at: " + file.getAbsolutePath()); 327 BasicGDMDocument document = null; 328 if(!ignore_list.contains(file) && file.exists()) { 329 // Check cache 330 SoftReference reference = (SoftReference) get(file); 331 if(reference != null) { 332 ///ystem.err.println("Hit!!"); 333 document = (BasicGDMDocument) reference.get(); 334 reference = null; 335 } 336 // If that didn't work try to parse in the document 337 if(document == null) { 338 ///ystem.err.println("Miss or stale reference."); 339 document = new BasicGDMDocument(file); 340 if(document.isValid()) { 341 put(file, new SoftReference(document)); 342 } 343 else { 344 ///ystem.err.println(file.getAbsolutePath() + " is not a valid GDM XML file."); 345 ignore_list.add(file); 346 document = null; 347 } 348 } 349 } 350 else { 351 ///ystem.err.println("Ignoring file or file doesn't exists."); 352 } 353 return document; 354 } 355 356 357 /** Display a prompt allowing a user to select a metadata element to attempt to force add/merge or ignore a metadata element to. For instance an old version of a metadata.xml from the DLS collection might have an assigned metadata value "Publisher=EC Courier", however Publisher won't automatically match to any metadata set. This prompt will be displayed, and some effort will be made to systematically locate the appropriate set. In this case this should be the DLS metadata set as dls.Publisher should be the closest match. Regardless the element selected is returned. 358 * @param element_name The name of the element we are trying to add, as a <strong>String</strong>. 359 * @return The <strong>ElementWrapper</strong> choosen by the user, or <i>null</i> to skip this metadata element. 360 */ 361 private ElementWrapper selectElement(String element_name) { 362 ElementWrapper result = Gatherer.c_man.getCollection().msm.prompt.selectElement(element_name); 363 dialog_cancelled = Gatherer.c_man.getCollection().msm.prompt.wasDialogCancelled(); 364 return result; 365 } 366 367 /** A 'basic' version of the more complete GDMDocument used elsewhere, this object provides the same functionality except that it doesn't use Metadata objects. These objects require live references to elements within the MetadataSetManager and GValueModels, but these may not yet exist (and indeed may never exist) for metadata parsed from metadata.xml's outside of our current collection. Thus this class returns a String (or an ArrayList of Strings) when asked for the metadata associated with a certain file. Also notice that this class provides no constructor method for creating a blank document, nor does it ever need a reference to the Gatherer.*/ 368 private class BasicGDMDocument 369 extends HashMap { 370 /** The document this class sources its data from. */ 371 private Document base_document; 372 /** This constructor takes the original document and parsed out and stores metadata with its association to filenames. */ 373 public BasicGDMDocument(File file) { 374 ///ystem.err.println("New BasicGDMDocument: " + file.getAbsolutePath()); 375 base_document = Utility.parse(file.getAbsolutePath(), false); 376 } 377 /** Retrieve all of the metadata in this file. */ 378 public ArrayList getAllMetadata() { 379 ArrayList metadatum = new ArrayList(); 380 // Don't search the cache as this would never have been added. 381 try { 382 // Retrieve the document element. 383 Element directorymetadata_element = base_document.getDocumentElement(); 384 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching. 385 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 386 for(int i = 0; i < fileset_elements.getLength(); i++) { 387 Element fileset_element = (Element) fileset_elements.item(i); 388 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); 389 for(int j = 0; j < filename_elements.getLength(); j++) { 390 Element filename_element = (Element) filename_elements.item(j); 391 // If they match add all of the metadata found in the Description child element, overwriting any metadata with the same element 392 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 393 for(int k = 0; k < description_elements.getLength(); k++) { 394 Element description_element = (Element) description_elements.item(k); 395 NodeList metadata_elements = description_element.getElementsByTagName(METADATA_ELEMENT); 396 for(int l = 0; l < metadata_elements.getLength(); l++) { 397 Element metadata_element = (Element) metadata_elements.item(l); 398 String element = metadata_element.getAttribute(NAME_ATTRIBUTE); 399 BasicMetadata metadata = new BasicMetadata(element, Utility.METADATA_XML, true); 400 // Remove any previous values for this metadata element. 401 for(int m = metadatum.size() - 1; m >= 0; m--) { 402 BasicMetadata old_metadata = (BasicMetadata) metadatum.get(m); 403 if(old_metadata.element.equals(element)) { 404 metadatum.remove(m); 405 } 406 old_metadata = null; 407 } 408 // Add the completed metadata and clean up 409 metadatum.add(metadata); 410 metadata = null; 411 element = null; 412 metadata_element = null; 413 } 414 metadata_elements = null; 415 description_element = null; 416 } 417 description_elements = null; 418 filename_element = null; 419 } 420 filename_elements = null; 421 fileset_element = null; 422 } 423 fileset_elements = null; 424 directorymetadata_element = null; 425 } 426 catch (Exception error) { 427 Gatherer.self.printStackTrace(error); 428 } 429 return metadatum; 430 } 512 description_elements = null; 513 } 514 else { 515 ///ystem.err.println("No Match!"); 516 } 517 filename_text = null; 518 filename_element = null; 519 } 520 filename_elements = null; 521 fileset_element = null; 522 } 523 fileset_elements = null; 524 directorymetadata_element = null; 525 } 526 catch (Exception error) { 527 Gatherer.self.printStackTrace(error); 528 } 529 // Cache the result, given that these external metadata.xmls are taken to be static at the time of reading (if you happen to be sourcing information from a opened collection that someone is working on, too bad. 530 if(filename != null) { 531 put(filename, metadatum); 532 } 533 else { 534 put(DIRECTORY_FILENAME, metadatum); 535 } 536 } 537 return metadatum; 538 } 431 539 432 /** Retrieve any metadata associated with a certain file. If filename is null we are attempting to find directory level metadata. */ 433 public ArrayList getMetadata(String filename, ArrayList metadatum_so_far, boolean folder_level) { 434 ///ystem.err.println("Retrieving metadata for: " + filename); 435 ArrayList metadatum = null; 436 // We start by attempting to retrieve this metadata from the cache. 437 if(filename != null) { 438 metadatum = (ArrayList) get(filename); 439 } 440 else { 441 metadatum = (ArrayList) get(DIRECTORY_FILENAME); 442 } 443 // If that failed we consult the document for metadata. 444 if(metadatum == null) { 445 metadatum = new ArrayList(); 446 if(metadatum_so_far == null) { 447 metadatum = new ArrayList(); 448 } 449 else { 450 metadatum = metadatum_so_far; 451 } 452 try { 453 // Retrieve the document element. 454 Element directorymetadata_element = base_document.getDocumentElement(); 455 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching. 456 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 457 for(int i = 0; i < fileset_elements.getLength(); i++) { 458 Element fileset_element = (Element) fileset_elements.item(i); 459 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); 460 for(int j = 0; j < filename_elements.getLength(); j++) { 461 Element filename_element = (Element) filename_elements.item(j); 462 String filename_text = MSMUtils.getValue(filename_element); 463 // Crappy. There are apparently two ways of assigning, say, directory level metadata to anything in the ac01ne directory from a parent directories metadata.xml. 464 // The developers guide way: ac01ne/.* 465 // The dls way: ac01ne 466 // So the three tests are 467 //System.err.println("Check for: " + (filename != null ? filename : ".*")); 468 //System.err.println("Folder level = " + folder_level); 469 //System.err.println("filename != null && '" + filename + "'.matches('" + filename_text + "') = " + (filename != null ? filename.matches(filename_text) : false)); 470 //System.err.println("filename != null && '" + filename + "'.matches('" + filename_text + DIRECTORY_FILENAME_SUFFIX + "') = " + (filename != null ? filename.matches(filename_text + DIRECTORY_FILENAME_SUFFIX) : false)); 471 //System.err.println("filename == null && '" + filename_text + "'.equals('.*') = " + (filename == null ? filename_text.equals(DIRECTORY_FILENAME) : false)); 472 if((filename != null && (filename.matches(filename_text) || filename.matches(filename_text))) 473 || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) { 474 ///ystem.err.println("Match: " + (filename != null ? filename : ".*") + " => " + filename_text); 475 // If they match add all of the metadata found in the Description child element, remembering to abide by desired mode (accumulate vs. overwrite). 476 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 477 for(int k = 0; k < description_elements.getLength(); k++) { 478 Element description_element = (Element) description_elements.item(k); 479 NodeList metadata_elements = description_element.getElementsByTagName(METADATA_ELEMENT); 480 for(int l = 0; l < metadata_elements.getLength(); l++) { 481 Element metadata_element = (Element) metadata_elements.item(l); 482 String element = metadata_element.getAttribute(NAME_ATTRIBUTE); 483 ///ystem.err.println("Found element: " + element); 484 //String language = metadata_element.getAttribute("language"); 485 String mode = metadata_element.getAttribute(MODE_ATTRIBUTE); 486 // Add the new metadata to our list of metadata for this target file. 487 String value = Utility.stripNL(MSMUtils.getValue(metadata_element)); 488 ///ystem.err.println("Found value: " + element); 489 BasicMetadata metadata = new BasicMetadata(element, value, mode.equals("accumulate")); 490 // If mode is overwrite, then remove any previous values for this metadata element. 491 if(!metadata.accumulates) { 492 for(int m = metadatum.size() - 1; m >= 0; m--) { 493 BasicMetadata old_metadata = (BasicMetadata) metadatum.get(m); 494 if(old_metadata.element.equals(element)) { 495 metadatum.remove(m); 496 } 497 old_metadata = null; 498 } 499 } 500 mode = null; 501 502 // Add the completed metadata and clean up 503 metadatum.add(metadata); 504 metadata = null; 505 value = null; 506 element = null; 507 metadata_element = null; 508 } 509 metadata_elements = null; 510 description_element = null; 511 } 512 description_elements = null; 513 } 514 else { 515 ///ystem.err.println("No Match!"); 516 } 517 filename_text = null; 518 filename_element = null; 519 } 520 filename_elements = null; 521 fileset_element = null; 522 } 523 fileset_elements = null; 524 directorymetadata_element = null; 525 } 526 catch (Exception error) { 527 Gatherer.self.printStackTrace(error); 528 } 529 // Cache the result, given that these external metadata.xmls are taken to be static at the time of reading (if you happen to be sourcing information from a opened collection that someone is working on, too bad. 530 if(filename != null) { 531 put(filename, metadatum); 532 } 533 else { 534 put(DIRECTORY_FILENAME, metadatum); 535 } 536 } 537 return metadatum; 538 } 539 540 /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */ 541 public boolean isValid() { 540 /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */ 541 public boolean isValid() { 542 542 // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata. 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 543 String doctype_name = base_document.getDoctype().getName(); 544 String root_name = base_document.getDocumentElement().getTagName(); 545 return ((doctype_name.equals("DirectoryMetadata") || doctype_name.equals("GreenstoneDirectoryMetadata")) && (root_name.equals("DirectoryMetadata") || root_name.equals("GreenstoneDirectoryMetadata"))); 546 } 547 548 /** Decode a string that was previously made Perl safe. 549 * @param safe The encoded <strong>String</strong> where dangerous characters have been escaped. 550 * @return A <strong>String</strong> with all the escaping removed. 551 */ 552 private String decode(String safe) { 553 String dangerous = safe.replaceAll("\\\\.","."); 554 return dangerous; 555 } 556 } 557 /** A simplistic version of metadata, with no live references. */ 558 private class BasicMetadata 559 implements Comparable { 560 public boolean accumulates; 561 /** The collection this metadata was extracted from. Important when attempting to map BasicMetadata to its Metadata incarnation. */ 562 public File collection; 563 /** The metadata element. */ 564 public String element = null; 565 /** The value. */ 566 public String value = null; 567 /** Constructor takes initial values for element and value. 568 * @param element The metadata element as a <strong>String</strong>. 569 * @param value The value as a <strong>String</strong>. 570 */ 571 public BasicMetadata(String element, String value, boolean accumulates) { 572 this.accumulates = accumulates; 573 this.element = element; 574 this.value = value; 575 } 576 577 public int compareTo(Object other) { 578 return toString().compareTo(other.toString()); 579 } 580 /** Compare two BasicMetadata objects for equality. 581 * @param object The other <strong>Object</strong>. 582 * @return <i>true</i> if this BasicMetadata matches the given object, <i>false</i> otherwise. 583 */ 584 public boolean equals(Object object) { 585 BasicMetadata other = (BasicMetadata) object; 586 if(collection != null) { 587 return (collection.equals(other.collection) && element.equals(other.element) && value.equals(other.value)); 588 } 589 return (element.equals(other.element) && value.equals(other.value)); 590 } 591 public String toString() { 592 return element + " = " + value; 593 } 594 } 595 596 /** This class provides a cache for the instances of parsed collect.cfg files and their associated data. Assures that the most recently cached CollectCFG will remain available. Older objects are maintained as soft references and are freed at the JVM implementations descretion, but are gareunteed to be garbage collected before an OutOfMemory exception is thrown. */ 597 private class CollectCFGCache 598 extends LinkedHashMap { 599 /** Retrieve the CollectCFG object that matches the given collection file path. 600 * @param collection_file The <strong>File</strong> that references the collection's directory. 601 * @return The <strong>CollectCFG</strong> that belongs to this collection, or <i>null</i> if no such file exists (so we probably aren't in a collection!). 602 */ 603 public CollectCFG get(File collect_cfg_file) { 604 604 ///ystem.err.println("Retrieve the collection configuration file at: " + collect_cfg_file); 605 605 CollectCFG collect_cfg = null; 606 606 // Attempt to load from cache. 607 607 SoftReference reference = (SoftReference) super.get(collect_cfg_file); 608 608 // If is doesn't exist, either because its never been loaded, or thats its cache reference has gone stale, attempt to load it again. 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 609 if(reference == null || (collect_cfg = (CollectCFG)reference.get()) == null) { 610 try { 611 collect_cfg = new CollectCFG(collect_cfg_file); 612 put(collect_cfg_file, new SoftReference(collect_cfg)); 613 } 614 catch(Exception error) { 615 Gatherer.printStackTrace(error); 616 collect_cfg = null; 617 } 618 } 619 return collect_cfg; 620 } 621 622 protected boolean removeEldestEntry(java.util.Map.Entry entry) { 623 return (size() > MAX_CFG_CACHE_SIZE); 624 } 625 } 626 627 /** The CollectCFG object encapsulates important metadata information extracted from a collect.cfg file, such as required metadata sets, and hfile associations. As the former are merged, their references are removed from this object, whereas the for the later references are replaced a representation of the hfile itself. */ 628 private class CollectCFG { 629 /** A list of the metadata sets associated with the collect.cfg file. */ 630 private ArrayList metadatasets = null; 631 /** A hash mapping from metadata element name to hierarchy file, or possibly hierarchy object. */ 632 private HashMap hfiles = null; 633 /** The token at the start of a classify command line within the collect.cfg. */ 634 static final private String CLASSIFY_COMMAND = "classify"; 635 /** The token at the start of a metadataset command line within the collect.cfg. */ 636 static final private String METADATASET_COMMAND = "metadataset"; 637 /** Constructor which takes a file assumed to be the location of a collect.cfg file belonging to a Greenstone Collection. 638 * @param file A <strong>File</strong> referencing a collect.cfg file. 639 */ 640 public CollectCFG(File file) 641 throws Exception { 642 642 ///atherer.println("Loading a new collection configuration file: " + file.getAbsolutePath()); 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 643 File etc_directory = file.getParentFile(); 644 hfiles = new HashMap(); 645 metadatasets = new ArrayList(); 646 FileReader reader = new FileReader(file); 647 BufferedReader in = new BufferedReader(reader); 648 String command = null; 649 while((command = in.readLine()) != null) { 650 CommandTokenizer tokenizer = new CommandTokenizer(command); 651 if(tokenizer.hasMoreTokens()) { 652 String token = tokenizer.nextToken().toLowerCase(); 653 if(token.equals(METADATASET_COMMAND)) { 654 String family_name = tokenizer.nextToken(); 655 String file_str = tokenizer.nextToken(); 656 if(file_str.startsWith("\"") && file_str.endsWith("\"") && !file_str.equals("\"\"")) { 657 file_str = file_str.substring(1, file_str.length() - 1); 658 } 659 // If the file str is -only- the filename then we add <col_dir>/metadata/ 660 File mds_file = null; 661 if(file_str.indexOf(File.separator) == -1) { 662 mds_file = new File(file.getParentFile().getParentFile(), File.separator + "metadata" + File.separator + file_str); 663 } 664 else { 665 mds_file = new File(file_str); 666 } 667 ///ystem.err.println("Attempting to file mds file at " + file.getAbsolutePath()); 668 if(mds_file.exists()) { 669 metadatasets.add(mds_file); 670 } 671 mds_file = null; 672 file_str = null; 673 family_name = null; 674 } 675 // Also look for any classify commands that include an hfile and element 676 else if(token.equals(CLASSIFY_COMMAND)) { 677 String hfile_name = null; 678 String element_name = null; 679 // Drop the classifier name 680 tokenizer.nextToken(); 681 while(tokenizer.hasMoreTokens()) { 682 token = tokenizer.nextToken().toLowerCase(); 683 if(token.equals("-hfile")) { 684 hfile_name = tokenizer.nextToken(); 685 } 686 else if(token.equals("-metadata")) { 687 element_name = tokenizer.nextToken(); 688 } 689 } 690 if(hfile_name != null && element_name != null) { 691 // If hfile_name has no path, append the etc directories one. Either way create a file reference 692 File hfile = null; 693 hfile_name = hfile_name.replace('\\', File.separatorChar); 694 hfile_name = hfile_name.replace('/', File.separatorChar); 695 if(hfile_name.indexOf(File.separator) == -1) { 696 hfile = new File(etc_directory, hfile_name); 697 } 698 else { 699 hfile = new File(hfile_name); 700 } 701 // Add to hfiles 702 ///atherer.println("Adding hfile reference: " + element_name + " -> " + hfile); 703 hfiles.put(element_name, hfile); 704 hfile = null; 705 } 706 element_name = null; 707 hfile_name = null; 708 } 709 tokenizer = null; 710 } 711 } 712 command = null; 713 in.close(); 714 reader.close(); 715 in = null; 716 reader = null; 717 717 // Now we search the etc directory for *.txt files which we attempt to parse as hfiles 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 718 File children[] = etc_directory.listFiles(); // We are sure there is at least one, collect.cfg 719 for(int i = 0; i < children.length; i++) { 720 // If this is a text file, extract the element name and process 721 String name = children[i].getName(); 722 if(children[i].isFile() && name.endsWith(".txt")) { 723 String element_name = name.substring(0, name.lastIndexOf(".")); 724 if(!hfiles.containsKey(element_name)) { 725 ///atherer.println("Adding hfile reference: " + element_name + " -> " + children[i]); 726 hfiles.put(element_name, children[i]); 727 } 728 element_name = null; 729 } 730 name = null; 731 } 732 children = null; 733 etc_directory = null; 734 file = null; 735 } 736 /** Attempts to retrieve the HFile object associated with a certain metadata element. This may have already been cached, or may need to be loaded. Then again it may not even be necessary. 737 * @param element The fully qualified name of a metadata element, as a <strong>String</strong>. 738 * @return The <strong>HFile</strong> associated with the given element, or <i>null</i> if its unnecessary. 739 * @see org.greenstone.gatherer.cdm.CommandTokenizer 740 */ 741 public HFile getHFile(String element) { 742 HFile result = null; 743 Object target = hfiles.get(element); 744 744 // If target is non-null 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 745 if(target != null) { 746 // If we haven't already load and parse the file. 747 if(target instanceof File) { 748 ///ystem.err.println("\nHFILE-MISS!! Loading " + target.toString()); 749 result = new HFile(); 750 try { 751 FileReader in_filereader = new FileReader((File)target); 752 //DecodeHTMLReader in_decodehtmlreader = new DecodeHTMLReader(in_filereader); 753 BufferedReader in = new BufferedReader(in_filereader); 754 String line = null; 755 while((line = in.readLine()) != null) { 756 CommandTokenizer tokenizer = new CommandTokenizer(line); 757 String alias = Utility.decodeGreenstone(tokenizer.nextToken()); 758 String index = tokenizer.nextToken(); 759 String value = Utility.decodeGreenstone(tokenizer.nextToken()); 760 ///ystem.err.println("Read " + index + ", " + alias + ", " + value); 761 if(alias.startsWith("\"") && alias.endsWith("\"") && !alias.equals("\"\"")) { 762 alias = alias.substring(1, alias.length() - 1); 763 } 764 if(value.startsWith("\"") && value.endsWith("\"") && !value.equals("\"\"")) { 765 value = value.substring(1, value.length() - 1); 766 } 767 result.add(index, alias, value); 768 value = null; 769 alias = null; 770 index = null; 771 tokenizer = null; 772 } 773 line = null; 774 in.close(); 775 in = null; 776 //in_decodehtmlreader = null; 777 in_filereader = null; 778 hfiles.put(element, result); 779 } 780 catch (Exception error) { 781 error.printStackTrace(); 782 hfiles.remove(element); 783 } 784 } 785 else { 786 ///ystem.err.print("HFILE-HIT!!! "); 787 result = (HFile) target; 788 } 789 } 790 790 // Else no hfile is needed for this element 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 791 target = null; 792 return result; 793 } 794 /** Retrieve the list of metadata sets associated with this collection. 795 * @return An <strong>ArrayList</strong> of metadata set Files. 796 */ 797 public ArrayList getMetadataSets() { 798 return metadatasets; 799 } 800 } 801 802 /** The HFile object provides a container for the mappings from indexes, of the form 1.1.1, to alias-value pairs. It also provides method to retrieving the alias and value for a certain element, remembering that values must be expressed in terms of their absolute subject heirarchy path. */ 803 private class HFile 804 extends HashMap { 805 /** Construct a new HFile object with no initial values. */ 806 public HFile() { 807 super(); 808 } 809 /** Add a new (index,(alias, value)) mapping. 810 * @param index The index of this mapping as a <strong>String</strong>. 811 * @param alias The alias of this mapping as a <strong>String</strong>. 812 * @param value And finally the value of this mapping as a, you guessed it, <strong>String</strong>. 813 */ 814 public void add(String index, String alias, String value) { 815 Entry entry = new Entry(index, alias, value); 816 816 ///ystem.err.println("Adding entry: " + index + " \"" + alias + "\" \"" + value + "\""); 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 817 put(index, entry); 818 put(alias, entry); 819 } 820 public String getAlias(String index) { 821 String alias = ""; 822 Entry entry = (Entry) get(index); 823 if(entry != null) { 824 alias = entry.alias; 825 } 826 entry = null; 827 return alias; 828 } 829 /** Retrieve the value associated with a certain index. This is harder than it first sounds as you must take into account the parent indexes of this one. 830 * @param index The index whose value you wish to calculate, as a <strong>String</strong>. 831 * @return The fully quantified path to the value that matches index, also as a <strong>String</strong>. Delimitiation between subject layers is denoted by the character '/' 832 */ 833 public String getValue(String index) { 834 834 ///ystem.err.println("Retrieve value for the alias/index: '" + index + "'"); 835 835 StringBuffer value = new StringBuffer(""); 836 836 // If index isn't the index, it must be the alias. Replace it with the index dammit. 837 838 839 840 841 842 843 844 837 Entry entry = null; 838 if(!Utility.isIndex(index)) { 839 ///ystem.err.println("\tThis is an alias."); 840 // Store this for later, as its exactly the same entry we'd get had we found the last component of a proper index. 841 entry = (Entry) get(index); 842 index = entry.index; 843 ///ystem.err.println("\tIndex is actually: " + index); 844 } 845 845 // Now build the hierarchy if necessary. 846 847 848 849 850 851 852 853 854 855 856 857 858 859 846 int dot_index = -1; 847 if((dot_index = index.indexOf(".")) != -1) { 848 ///ystem.err.println("\tHierarchy information required -->"); 849 value.append(getValue(index.substring(0, dot_index))); 850 value.append("\\"); 851 ///ystem.err.println("\t<-- Hierarchy information complete"); 852 } 853 if(entry == null) { 854 entry = (Entry) get(index); 855 } 856 if(entry != null) { 857 value.append(entry.value); 858 } 859 entry = null; 860 860 ///ystem.err.println("\tFinal value is: '" + value.toString() + "'\n"); 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 861 return value.toString(); 862 } 863 864 private class Entry { 865 public String alias = null; 866 public String index = null; 867 public String value = null; 868 public Entry(String index, String alias, String value) { 869 this.alias = alias; 870 this.index = index; 871 this.value = value; 872 } 873 } 874 } 875 876 private class MetadataXMLFileSearch { 877 public File file; 878 public String filename; 879 public MetadataXMLFileSearch(File file, String filename) { 880 this.file = file; 881 this.filename = filename; 882 } 883 } 884 884 }
Note:
See TracChangeset
for help on using the changeset viewer.