Changeset 7293
- Timestamp:
- 2004-05-07T13:10:18+12:00 (20 years ago)
- Location:
- trunk/gli/src/org/greenstone/gatherer/msm
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gli/src/org/greenstone/gatherer/msm/MetadataXMLFile.java
r7234 r7293 84 84 public void addMetadata(String filename, Metadata metadata, boolean force_accumulate) 85 85 { 86 // System.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory."));86 ///ystem.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory.")); 87 87 try { 88 88 // Retrieve the document element … … 94 94 // If there is no existing fileset, then create one 95 95 if (fileset_element == null) { 96 // System.err.println("Creating a new fileset.");97 96 fileset_element = base_document.createElement(FILESET_ELEMENT); 98 97 Element filename_element = base_document.createElement(FILENAME_ELEMENT); … … 113 112 // Otherwise we append the new fileset to gdm_element's children 114 113 else { 115 // System.err.println("New fileset for " + filename);114 ///ystem.err.println("New fileset for " + filename); 116 115 filename = Codec.transform(filename, Codec.TEXT_TO_REGEXP); 117 // System.err.println("After transform: " + filename);116 ///ystem.err.println("After transform: " + filename); 118 117 filename_text = base_document.createTextNode(filename); 119 118 gdm_element.appendChild(fileset_element); … … 128 127 String name = metadata.getElement().getName(); 129 128 // If this is extracted metadata, we use a special element name that won't be recognized by greenstone 129 String current_metadata_type = StaticStrings.METADATA_ELEMENT; 130 130 if(name.startsWith(Utility.EXTRACTED_METADATA_NAMESPACE)) { 131 metadata_element = base_document.createElement(ALL_METADATA_TYPES[1]);131 current_metadata_type = StaticStrings.EXTRACTED_METADATA_ELEMENT; 132 132 name = name.substring(Utility.EXTRACTED_METADATA_NAMESPACE.length() + 1); 133 133 } 134 else { 135 metadata_element = base_document.createElement(ALL_METADATA_TYPES[0]); 136 } 134 135 metadata_element = base_document.createElement(current_metadata_type); 137 136 metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name); 138 137 139 138 // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset. 139 // also we are checking for the existence of exactly the same metadata cos sometimes we can be asked to add the same metadata twice. 140 140 boolean will_accumulate = false; 141 NodeList sibling_description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 142 for(int k = 0; !will_accumulate && k < sibling_description_elements.getLength(); k++) { 143 Element sibling_description_element = (Element) sibling_description_elements.item(k); 144 // We have to do this for each type of metadata 145 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 146 NodeList sibling_metadata_elements = sibling_description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 147 for(int l = 0; !will_accumulate && l < sibling_metadata_elements.getLength(); l++) { 148 Element sibling_metadata_element = (Element) sibling_metadata_elements.item(l); 149 // It appears that its possible that we can be asked to add the same metadata twice (especially after a copy action is cancelled then repeated). So we check if we have been asked to add exactly the same value twice. 150 if(sibling_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE))) { 151 // Check the values and return if they are the same. 152 if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sibling_metadata_element))) { 153 return; 154 } 155 will_accumulate = true; 156 } 157 sibling_metadata_element = null; 158 } 159 sibling_metadata_elements = null; 160 } 161 sibling_description_element = null; 162 } 163 sibling_description_elements = null; 141 // only look through Metadata or XMetadata depending on which type we are trying to add 142 NodeList sibling_metadata_elements = fileset_element.getElementsByTagName(current_metadata_type); 143 for (int i=0; i<sibling_metadata_elements.getLength(); i++) { 144 Element sib_meta = (Element) sibling_metadata_elements.item(i); 145 if(name.equals(sib_meta.getAttribute(StaticStrings.NAME_ATTRIBUTE))) { 146 // found one with the same name - the new metadata will accumulate 147 will_accumulate = true; 148 // check for the same value 149 // *** TODO this doesn't work for hierarchical metadata 150 System.err.println("sibling value="+MSMUtils.getValue(sib_meta)+", this value="+metadata.getAbsoluteValue()); 151 if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sib_meta))) { 152 System.err.println("matches!!!!!"); 153 return; 154 } 155 156 } 157 sib_meta = null; 158 } 159 sibling_metadata_elements = null; 160 164 161 if(will_accumulate || force_accumulate) { //mode.equals(ACCUMULATE)) { 165 162 metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE); 166 163 } 167 164 // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm writing the value out as a full path string 168 GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement()); 169 String node_value = null; 170 if(model != null && model.isHierarchy()) { 165 // getAbsoluteValue now does return the full path 166 String node_value = metadata.getAbsoluteValue(); 167 //GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement()); 168 //String node_value = null; 169 //if(model != null && model.isHierarchy()) { 171 170 //node_value = /odec.transform(metadata.getValueNode().getFullPath(false), /odec.TEXT_TO_DOM); 172 node_value = metadata.getValueNode().getFullPath(false);173 }174 else {175 176 }171 //node_value = metadata.getValueNode().getFullPath(false); 172 //} 173 //else { 174 //node_value = metadata.getAbsoluteValue(); 175 // } 177 176 ///ystem.err.println("Creating node in MetadataXMLFile: '" + node_value + "'"); 178 177 metadata_element.appendChild(base_document.createTextNode(node_value)); … … 192 191 } 193 192 194 public int countMetadata() { 195 int count = 0; 193 /** this is used to 'purge' the metadata - I've taken the purge code out of getMetadata and put it in to here, cos its only called from one place and we dont want to retrieve the metadata, just update it */ 194 public void cleanUpMetadataRefs() { 195 //Gatherer.println("clean up metadata refs!"); 196 197 String file_relative_path = ""; 196 198 try { 197 199 // Retrieve the document element. 198 200 Element directorymetadata_element = base_document.getDocumentElement(); 199 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching. 200 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 201 for(int i = 0; i < fileset_elements.getLength(); i++) { 202 Element fileset_element = (Element) fileset_elements.item(i); 203 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 204 for(int k = 0; k < description_elements.getLength(); k++) { 205 Element description_element = (Element) description_elements.item(k); 206 // We have to do this for each type of metadata 207 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 208 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 209 count = count + metadata_elements.getLength(); 210 metadata_elements = null; 211 } 212 description_element = null; 213 } 214 description_elements = null; 215 fileset_element = null; 216 } 217 fileset_elements = null; 218 directorymetadata_element = null; 219 } 220 catch (Exception error) { 221 Gatherer.printStackTrace(error); 222 } 223 return count; 201 // Iterate through all the metadata 202 // We have to do this for each type of metadata - do we need it for extracted metadata - will this only affect hierarchical metadata in which case only need to do normal metadata ? 203 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 204 NodeList metadata_elements = directorymetadata_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 205 for(int l = 0; l < metadata_elements.getLength(); l++) { 206 Element metadata_element = (Element) metadata_elements.item(l); 207 String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE); 208 String raw_value = MSMUtils.getValue(metadata_element); 209 // 210 // ***** LEGACY SUPPORT ***** 211 // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\' 212 if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) { 213 Gatherer.println("Detected Legacy Path: " + raw_value); 214 raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR); 215 MSMUtils.setValue(metadata_element, raw_value); 216 } 217 218 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element); 219 if (element != null) { 220 GValueNode value = Metadata.getDefaultValueNode(element, raw_value); 221 String current_value = value.getFullPath(false); 222 if(!raw_value.equals(current_value)) { 223 // set the new value 224 MSMUtils.setValue(metadata_element, current_value); 225 } 226 value = null; 227 current_value = null; 228 } 229 element = null; 230 metadata_element = null; 231 raw_element = null; 232 raw_value = null; 233 } // for each metadata element 234 metadata_elements = null; 235 } // for each metadata type 236 } catch (Exception error) { 237 Gatherer.self.printStackTrace(error); 238 } 224 239 } 225 240 … … 263 278 } 264 279 265 /** Get all of the metadata, including directory level, associated with this file. */ 280 /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. */ 281 // !! Michael has written a much nicer version of this function !! 282 // Kath has cleaned up this version a bit 266 283 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level) { 267 return getMetadata(filename, remove, metadatum_so_far, file, append_folder_level, false);268 }269 /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. Finally if purge is set retrieve every piece of metadata in this file. */270 // !! Michael has written a much nicer version of this function !!271 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level, boolean purge) {272 284 Gatherer.println("Get metadata for " + filename); 273 Gatherer.println("remove = " + remove + ", metadata_so_far = " + (metadatum_so_far != null ? String.valueOf(metadatum_so_far.size()) : "null") + ", file = " + file + ", append_folder_level = " + append_folder_level + ", purge = " + purge);285 Gatherer.println("remove = " + remove + ", metadata_so_far = " + (metadatum_so_far != null ? String.valueOf(metadatum_so_far.size()) : "null") + ", file = " + file + ", append_folder_level = " + append_folder_level); 274 286 275 287 // Determine the file's path relative to the location of the metadata.xml file … … 294 306 for(int i = 0; i < fileset_elements.getLength(); i++) { 295 307 Element fileset_element = (Element) fileset_elements.item(i); 308 boolean fileset_matches = false; 309 // look through the filename elements of this and see if we have a match 296 310 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); 311 String filename_text = ""; 297 312 for(int j = 0; j < filename_elements.getLength(); j++) { 298 313 Element filename_element = (Element) filename_elements.item(j); 299 String filename_text = MSMUtils.getValue(filename_element); 300 if((filename != null && (filename.matches(filename_text) || (append_folder_level && filename.indexOf(File.separator) != -1 && filename_text.equals(filename.substring(0, filename.indexOf(File.separator)))))) || ((filename == null || append_folder_level) && filename_text.equals(DIRECTORY_FILENAME)) || purge) { 301 // If they match add all of the metadata found in the Description child element, remembering to abide by desired mode (accumulate vs. overwrite). 302 // Normal metadata 303 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 304 for(int k = 0; k < description_elements.getLength(); k++) { 305 Element description_element = (Element) description_elements.item(k); 306 // We have to do this for each type of metadata 307 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 308 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 309 for(int l = 0; l < metadata_elements.getLength(); l++) { 310 Element metadata_element = (Element) metadata_elements.item(l); 311 String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE); 312 //String language = metadata_element.getAttribute("language"); 313 String mode = metadata_element.getAttribute(MODE_ATTRIBUTE); 314 String raw_value = MSMUtils.getValue(metadata_element); 315 // 316 //raw_value = Codec.transform(raw_value, Codec.DOM_TO_); 317 ///ystem.err.println("Retrieved raw value: " + raw_value); 318 // ***** LEGACY SUPPORT ***** 319 // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\' 320 if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) { 321 ///ystem.err.println("Blarg"); 322 Gatherer.println("Detected Legacy Path: " + raw_value); 323 raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR); 324 Gatherer.println("Updated Path To: " + raw_value); 325 MSMUtils.setValue(metadata_element, raw_value); 314 filename_text = MSMUtils.getValue(filename_element); 315 if((filename != null && (filename.matches(filename_text) || (append_folder_level && filename.indexOf(File.separator) != -1 && filename_text.equals(filename.substring(0, filename.indexOf(File.separator)))))) || ((filename == null || append_folder_level) && filename_text.equals(DIRECTORY_FILENAME))) { 316 fileset_matches = true; 317 filename_element = null; 318 break; 319 } 320 filename_element = null; 321 } 322 323 if (!fileset_matches) { 324 // go to teh next fileset 325 fileset_element = null; 326 continue; 327 } 328 // If this fileset matches add all of the metadata found in the fileset, remembering to abide by desired mode (accumulate vs. overwrite). 329 // We have to do this for each type of metadata 330 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 331 NodeList metadata_elements = fileset_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 332 for(int l = 0; l < metadata_elements.getLength(); l++) { 333 Element metadata_element = (Element) metadata_elements.item(l); 334 String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE); 335 String mode = metadata_element.getAttribute(MODE_ATTRIBUTE); 336 String raw_value = MSMUtils.getValue(metadata_element); 337 // ***** LEGACY SUPPORT ***** 338 // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\' 339 if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) { 340 Gatherer.println("Detected Legacy Path: " + raw_value); 341 raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR); 342 Gatherer.println("Updated Path To: " + raw_value); 343 MSMUtils.setValue(metadata_element, raw_value); 344 } 345 // ************************** 346 // Using the element string and value, retrieve a matching Metadata object from the cache 347 Metadata metadata = null; 348 // If this element has hierarchy values then we must ensure the raw value is a full path, not an index. 349 // Try to retrieve an already comstructed piece of metadata from file - but not if we are purging, as this will stuff up anything that is still using that metadata - such as the GTable 350 if(MetadataXMLFileManager.metadata_cache.contains(raw_element, raw_value)) { 351 ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n"); 352 metadata = (Metadata) MetadataXMLFileManager.metadata_cache.get(raw_element, raw_value); 353 } 354 else { 355 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element); 356 if (element != null) { 357 GValueNode value = Metadata.getDefaultValueNode(element, raw_value); 358 ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n"); 359 metadata = new Metadata(element, value); 360 MetadataXMLFileManager.metadata_cache.put(raw_element, raw_value, metadata); 361 362 ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n"); 363 value = null; 364 element = null; 365 } 366 } 367 368 // Determine whether this metadata is file or folder level 369 if (metadata != null) { 370 // System.err.println("File relative path: " + file_relative_path + " Filename text: " + filename_text); 371 // Direct match to regular expression 372 if (file_relative_path.matches(filename_text)) { 373 boolean is_folder_level = filename_text.equals(".*") && !file_relative_path.equals(""); 374 metadata.setFile(file); 375 metadata.setFileLevel(!is_folder_level); 376 } 377 // Indirect match to regular expression (always folder level) 378 else if (file_relative_path.startsWith(filename_text + File.separator)) { 379 metadata.setFile(new File(file, filename_text)); 380 metadata.setFileLevel(false); 381 } 382 383 // If mode is overwrite, then remove any previous values for this metadata element. 384 if(mode.equals("accumulate")) { 385 metadata.setAccumulate(true); 386 } 387 else { 388 metadata.setAccumulate(false); 389 ///ystem.err.println("Metadata overwrites: " + metadata); 390 for(int m = metadatum.size() - 1; m >= 0; m--) { 391 Metadata old_metadata = (Metadata) metadatum.get(m); 392 if(old_metadata.getElement().equals(metadata.getElement())) { 393 metadatum.remove(m); 394 ///ystem.err.println("Removing overridden metadata: " + old_metadata); 326 395 } 327 // ************************** 328 // Using the element string and value, retrieve a matching Metadata object from the cache 329 Metadata metadata = null; 330 // If this element has hierarchy values then we must ensure the raw value is a full path, not an index. 331 // Try to retrieve an already comstructed piece of metadata from file - but not if we are purging, as this will stuff up anything that is still using that metadata - such as the GTable 332 if(MetadataXMLFileManager.metadata_cache.contains(raw_element, raw_value) && !purge) { 333 ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n"); 334 metadata = (Metadata) MetadataXMLFileManager.metadata_cache.get(raw_element, raw_value); 335 } 336 else { 337 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element); 338 if (element != null) { 339 GValueNode value = Metadata.getDefaultValueNode(element, raw_value); 340 ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n"); 341 metadata = new Metadata(element, value); 342 if(!purge) { 343 MetadataXMLFileManager.metadata_cache.put(raw_element, raw_value, metadata); 344 } 345 ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n"); 346 value = null; 347 element = null; 348 } 349 } 350 351 // Determine whether this metadata is file or folder level 352 if (metadata != null) { 353 // System.err.println("File relative path: " + file_relative_path + " Filename text: " + filename_text); 354 // Direct match to regular expression 355 if (file_relative_path.matches(filename_text)) { 356 boolean is_folder_level = filename_text.equals(".*") && !file_relative_path.equals(""); 357 metadata.setFile(file); 358 metadata.setFileLevel(!is_folder_level); 359 } 360 // Indirect match to regular expression (always folder level) 361 else if (file_relative_path.startsWith(filename_text + File.separator)) { 362 metadata.setFile(new File(file, filename_text)); 363 metadata.setFileLevel(false); 364 } 365 366 // If mode is overwrite, then remove any previous values for this metadata element. 367 if(mode.equals("accumulate")) { 368 metadata.setAccumulate(true); 369 } 370 else { 371 metadata.setAccumulate(false); 372 ///ystem.err.println("Metadata overwrites: " + metadata); 373 for(int m = metadatum.size() - 1; m >= 0; m--) { 374 Metadata old_metadata = (Metadata) metadatum.get(m); 375 if(old_metadata.getElement().equals(metadata.getElement())) { 376 metadatum.remove(m); 377 ///ystem.err.println("Removing overridden metadata: " + old_metadata); 378 } 379 old_metadata = null; 380 } 381 } 382 mode = null; 383 // Add the completed metadata and clean up 384 ///ystem.err.println("Adding metadata: " + metadata); 385 metadatum.add(metadata); 386 // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete). 387 // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level. 388 ///atherer.println("Have we been asked to remove the metadata: " + metadata); 389 ///atherer.println("Given:"); 390 ///atherer.println("\tremove = " + remove); 391 ///atherer.println("\tfilename = " + filename); 392 ///atherer.println("\tfilename_text = " + filename_text + "?"); 393 if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) { 394 ///atherer.println("Yes! Queuing for Removal."); 395 queued_for_removal.add(metadata_element); 396 } 397 else { 398 ///atherer.println("No. Updating."); 399 String current_value = metadata.getValueNode().getFullPath(false); 400 ///ystem.err.println("Checking the current mdv path: " + current_value); 401 ///ystem.err.println("Against whats in the metadata file: " + raw_value); 402 if(!raw_value.equals(current_value)) { 403 // Remove old text 404 while(metadata_element.hasChildNodes()) { 405 metadata_element.removeChild(metadata_element.getFirstChild()); 406 } 407 // Add new. 408 metadata_element.appendChild(base_document.createTextNode(current_value)); 409 } 410 } 411 } 412 metadata = null; 413 raw_value = null; 414 raw_element = null; 415 metadata_element = null; 396 old_metadata = null; 416 397 } 417 metadata_elements = null; 418 } 419 420 // Now we remove any elements that have been queued for deletion 421 for(int a = 0; queued_for_removal != null && a < queued_for_removal.size(); a++) { 422 Element metadata_element = (Element) queued_for_removal.get(a); 423 description_element.removeChild(metadata_element); 424 up_to_date = false; 425 } 426 queued_for_removal.clear(); 427 428 // If the description_element no longer has any children remove it 429 NodeList metadata_elements = description_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT); 430 NodeList extracted_elements = description_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT); 431 if(metadata_elements.getLength() == 0 && extracted_elements.getLength() == 0) { 432 fileset_element.removeChild(description_element); 433 up_to_date = false; 434 } 435 description_element = null; 398 } 399 mode = null; 400 // Add the completed metadata and clean up 401 ///ystem.err.println("Adding metadata: " + metadata); 402 metadatum.add(metadata); 403 // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete). 404 // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level. 405 ///atherer.println("Have we been asked to remove the metadata: " + metadata); 406 ///atherer.println("Given:"); 407 ///atherer.println("\tremove = " + remove); 408 ///atherer.println("\tfilename = " + filename); 409 ///atherer.println("\tfilename_text = " + filename_text + "?"); 410 if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) { 411 ///atherer.println("Yes! Queuing for Removal."); 412 queued_for_removal.add(metadata_element); 413 } 414 else { 415 ///atherer.println("No. Updating."); 416 String current_value = metadata.getValueNode().getFullPath(false); 417 ///ystem.err.println("Checking the current mdv path: " + current_value); 418 ///ystem.err.println("Against whats in the metadata file: " + raw_value); 419 if(!raw_value.equals(current_value)) { 420 MSMUtils.setValue(metadata_element, current_value); 421 } 422 } 436 423 } 437 description_elements = null; 438 } 439 filename_text = null; 440 filename_element = null; 441 } 442 // If the file set no longer has any description entries, remove it entirely 443 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 444 if(description_elements.getLength() == 0) { 445 directorymetadata_element.removeChild(fileset_element); 446 up_to_date = false; 447 } 448 description_elements = null; 449 filename_elements = null; 424 metadata = null; 425 raw_value = null; 426 raw_element = null; 427 metadata_element = null; 428 } // for all metadata elements 429 metadata_elements = null; 430 } // for all metadata types 431 432 // Now we remove any elements that have been queued for deletion 433 for(int a = 0; queued_for_removal != null && a < queued_for_removal.size(); a++) { 434 Element metadata_element = (Element) queued_for_removal.get(a); 435 Element parent = (Element) metadata_element.getParentNode(); 436 parent.removeChild(metadata_element); 437 438 up_to_date = false; 439 } 440 queued_for_removal.clear(); 441 442 // If the fileset no longer has any metadata remove it 443 NodeList metadata_elements = fileset_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT); 444 if (metadata_elements.getLength()==0) { 445 metadata_elements = fileset_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT); 446 if (metadata_elements.getLength()==0) { 447 directorymetadata_element.removeChild(fileset_element); 448 up_to_date = false; 449 } 450 } 451 metadata_elements = null; 450 452 fileset_element = null; 451 } 453 filename_text = null; 454 } // for each fileset element 455 452 456 fileset_elements = null; 453 457 directorymetadata_element = null; … … 461 465 } 462 466 467 /** returns true if the document has at least one Metadata or XMetadata element */ 468 public boolean hasMetadata() { 469 boolean has_meta = true; 470 try { 471 // Retrieve the document element. 472 Element directory_metadata_element = base_document.getDocumentElement(); 473 NodeList metadata_nodes = directory_metadata_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT); 474 if (metadata_nodes.getLength()==0) { 475 // try extracted metadata 476 metadata_nodes = directory_metadata_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT); 477 if (metadata_nodes.getLength()==0) { 478 has_meta = false; 479 } 480 } 481 directory_metadata_element=null; 482 metadata_nodes=null; 483 } 484 catch (Exception error) { 485 Gatherer.printStackTrace(error); 486 } 487 return has_meta; 488 } 489 463 490 /** Determine if this document has been saved recently, and thus xml file version is up to date. */ 464 491 public boolean isUpToDate() { … … 504 531 } 505 532 506 /** Remove the given directory level metadata from this document. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */533 /** Remove the given metadata from this document.If filename is null, then removes directory level metadata, otherwise just removes it from the specified file. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */ 507 534 public void removeMetadata(String filename, Metadata metadata) { 508 535 Gatherer.println("Remove metadata: " + metadata + "\nFrom filename: " + filename); … … 512 539 boolean make_next_metadata_element_overwrite = false; 513 540 boolean remove_fileset = false; 541 // is this extracted or normal metadata? 542 String removing_metadata_name = metadata.getElement().getName(); 543 // If this is extracted metadata, we use a special element name that won't be recognized by greenstone 544 String current_metadata_type = StaticStrings.METADATA_ELEMENT; 545 if(removing_metadata_name.startsWith(Utility.EXTRACTED_METADATA_NAMESPACE)) { 546 current_metadata_type = StaticStrings.EXTRACTED_METADATA_ELEMENT; 547 removing_metadata_name = removing_metadata_name.substring(Utility.EXTRACTED_METADATA_NAMESPACE.length() + 1); 548 } 549 514 550 // Retrieve the document element. 515 551 Element directorymetadata_element = base_document.getDocumentElement(); 516 // Iterate through the filesets looking for the directory levelone.552 // Iterate through the filesets looking for the appropriate one. 517 553 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 518 554 for(int i = 0; !found && i < fileset_elements.getLength(); i++) { … … 523 559 String filename_text = MSMUtils.getValue(filename_element); 524 560 if((filename != null && filename.matches(filename_text) && !filename.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) { 525 // Retrieve the Metadata Element for this fileset, and iterate through them looking for the one which we are to remove. 526 NodeList description_elements = fileset_element.getElementsByTagName("Description"); 527 for(int k = 0; !found && k < description_elements.getLength(); k++) { 528 Element description_element = (Element) description_elements.item(k); 529 // We have to do this for each type of metadata 530 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 531 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 532 for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) { 533 Element metadata_element = (Element) metadata_elements.item(l); 534 String element = metadata_element.getAttribute("name"); 535 String value = MSMUtils.getValue(metadata_element); 536 // See if this is the metadata we wish to remove 537 if(element.equals(metadata.getElement().getName())) { 538 if(value.equals(metadata.getValueNode().getFullPath(false))) { 539 // Remove it 540 ///ystem.err.println("Remove " + element + "-" + value); 541 description_element.removeChild(metadata_element); 542 found = true; 543 // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now. 544 if(first_metadata_element_found && !metadata.accumulates()) { 545 ///ystem.err.println("First of this element found!"); 546 make_next_metadata_element_overwrite = true; 547 } 548 } 549 // If this was the first metadata we've found with the element of the one to be removed set first found to false. 550 else if(first_metadata_element_found) { 551 ///ystem.err.println("Found a matching element: " + element + "=" + value); 552 first_metadata_element_found = false; 553 } 554 // Otherwise we should make this metadata overwrite as requested. 555 else if(make_next_metadata_element_overwrite) { 556 ///ystem.err.println("Changing to overwrite: " + element + "=" + value); 557 metadata_element.setAttribute(MODE_ATTRIBUTE, ""); 558 } 561 // Retrieve the Metadata Elements for this fileset, and iterate through them looking for the one which we are to remove. 562 NodeList metadata_elements = fileset_element.getElementsByTagName(current_metadata_type); 563 for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) { 564 Element metadata_element = (Element) metadata_elements.item(l); 565 String element = metadata_element.getAttribute("name"); 566 String value = MSMUtils.getValue(metadata_element); 567 // See if this is the metadata we wish to remove 568 if(element.equals(removing_metadata_name)) { 569 if(value.equals(metadata.getValueNode().getFullPath(false))) { 570 // Remove it 571 ///ystem.err.println("Remove " + element + "-" + value); 572 Element parent_elem = (Element)metadata_element.getParentNode(); 573 parent_elem.removeChild(metadata_element); 574 575 //description_element.removeChild(metadata_element); 576 found = true; 577 // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now. 578 if(first_metadata_element_found && !metadata.accumulates()) { 579 ///ystem.err.println("First of this element found!"); 580 make_next_metadata_element_overwrite = true; 559 581 } 560 value = null;561 element = null;562 metadata_element = null;563 582 } 564 NodeList normal_metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[0]); 565 NodeList extracted_metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[1]); 566 // If we found it, removed it, and now the description tag has no children, mark the fileset for removal 567 if(normal_metadata_elements.getLength() == 0 && extracted_metadata_elements.getLength() == 0) { 568 remove_fileset = true; 583 // If this was the first metadata we've found with the element of the one to be removed set first found to false. 584 else if(first_metadata_element_found) { 585 ///ystem.err.println("Found a matching element: " + element + "=" + value); 586 first_metadata_element_found = false; 569 587 } 570 extracted_metadata_elements = null; 571 normal_metadata_elements = null; 572 metadata_elements = null; 573 } 574 description_element = null; 588 // Otherwise we should make this metadata overwrite as requested. 589 else if(make_next_metadata_element_overwrite) { 590 ///ystem.err.println("Changing to overwrite: " + element + "=" + value); 591 metadata_element.setAttribute(MODE_ATTRIBUTE, ""); 592 } 593 } 594 value = null; 595 element = null; 596 metadata_element = null; 597 } // for each metadata 598 metadata_elements = null; 599 } // if the filename matches 600 601 if (found) { 602 // if we found an element and removed it, we now want to check whether the fileset is empty or not 603 NodeList metadata_elements = fileset_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT); 604 if (metadata_elements.getLength() ==0) { 605 metadata_elements = fileset_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT); 606 if (metadata_elements.getLength() ==0) { 607 // remove the fileset 608 directorymetadata_element.removeChild(fileset_element); 609 } 575 610 } 576 description_elements = null;611 metadata_elements = null; 577 612 } 578 613 filename_text = null; 579 614 filename_element = null; 580 } 615 } // for each filename element 581 616 filename_elements = null; 582 if(found && remove_fileset) {583 directorymetadata_element.removeChild(fileset_element);584 }585 617 fileset_element = null; 586 } 618 } // for each fileset element 587 619 fileset_elements = null; 588 620 directorymetadata_element = null; … … 593 625 } 594 626 } 595 627 596 628 /** Change the up to date flag. 597 629 * @param up_to_date true if the document on the filesystem is the same as the one in memory, false otherwise -
trunk/gli/src/org/greenstone/gatherer/msm/MetadataXMLFileManager.java
r7114 r7293 370 370 */ 371 371 public synchronized void metadataChanged(MSMEvent event) { 372 ///ystem.err.println("Recieved Event: " + event.toString());373 372 File file = event.getFile(); 374 373 if(file == null) { … … 446 445 File file = (File) iterator.next(); 447 446 MetadataXMLFile document = (MetadataXMLFile) get(file); 448 if(!document.isUpToDate()) { 449 //ystem.err.println("Saving: " + file.getAbsolutePath()); 450 // First purge any old references. 451 document.getMetadata(null, false, null, null, false, true); 452 // If there is no metadata in this document then don't write out a file. In fact delete any file that already exists. 453 int count = document.countMetadata(); 454 if(count > 0) { 455 // Now write the xml 456 Utility.export(document.getDocument(), file); 457 document.setUpToDate(true); 458 } 459 else if(file.exists()) { 460 file.delete(); 461 } 462 } 463 } 464 } 447 save(file, document); 448 } 449 } 450 465 451 /** Used to cause the document associated with a particular file to write the latest copy of itself to disk. */ 466 452 public void save(FileNode node) { … … 468 454 if(file != null && file.isFile()) { 469 455 MetadataXMLFile document = getDocument(file); 470 File xml_file; 471 if(file.isFile()) { 472 xml_file = new File(file.getParentFile(), "metadata.xml"); 473 } 474 else { 475 xml_file = new File(file, "metadata.xml"); 476 } 477 if(document != null && !document.isUpToDate()) { 478 // First purge any old references. 479 document.getMetadata(null, false, null, null, true); 480 // Now write the xml 481 Utility.export(document.getDocument(), xml_file); 482 document.setUpToDate(true); 483 } 484 xml_file = null; 456 if (document != null && !document.isUpToDate()) { 457 File xml_file; 458 if(file.isFile()) { 459 xml_file = new File(file.getParentFile(), "metadata.xml"); 460 } 461 else { 462 xml_file = new File(file, "metadata.xml"); 463 } 464 save(xml_file, document); 465 xml_file = null; 466 } 485 467 document = null; 486 468 } 487 469 file = null; 488 470 } 471 489 472 490 473 /** Write out the latest copy of a certain document. */ 491 474 public void save(File file, MetadataXMLFile document) { 492 if(!document.isUpToDate()) { 493 // First purge any old references. 494 document.getMetadata(null, false, null, null, true); 495 // Now write the xml 496 Utility.export(document.getDocument(), file); 497 document.setUpToDate(true); 498 } 475 if(!document.isUpToDate()) { 476 // First purge any old references. 477 document.cleanUpMetadataRefs(); 478 // If there is no metadata in this document then don't write out a file. In fact delete any file that already exists. 479 boolean has_metadata = document.hasMetadata(); 480 if (has_metadata) { 481 // Now write the xml 482 Utility.export(document.getDocument(), file); 483 } 484 else if(file.exists()) { 485 file.delete(); 486 } 487 document.setUpToDate(true); 488 } 499 489 } 500 490 … … 523 513 private Metadata checkCache(Metadata metadata) { 524 514 if(metadata != null) { 525 ///ystem.err.println("Search for " + metadata.toString()); 526 if(metadata_cache.contains(metadata.getElement(), metadata.getValueNode())) { 527 metadata = (Metadata) metadata_cache.get(metadata.getElement(), metadata.getValueNode()); 515 //if(metadata_cache.contains(metadata.getElement(), metadata.getValueNode())) { 516 // metadata = (Metadata) metadata_cache.get(metadata.getElement(), metadata.getValueNode()); 517 // System.err.println("cache contains teh value"); 518 // } 519 // the element name was used as the key 520 if(metadata_cache.contains(metadata.getElement().getElement().getAttribute("name"), metadata.getValue())) { 521 metadata = (Metadata) metadata_cache.get(metadata.getElement().getElement().getAttribute("name"), metadata.getValue()); 528 522 } 529 523 }
Note:
See TracChangeset
for help on using the changeset viewer.