Changeset 6051 for trunk/gli/src/org/greenstone/gatherer/msm
- Timestamp:
- 2003-11-30T21:43:23+13:00 (21 years ago)
- Location:
- trunk/gli/src/org/greenstone/gatherer/msm
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gli/src/org/greenstone/gatherer/msm/ElementWrapper.java
r6043 r6051 50 50 import org.w3c.dom.*; 51 51 52 /** This class provides a convience wrapper around a DOM model Element to allow Components such as the MetadataTable to display this information properly. 52 /** This class provides a convience wrapper around a DOM model Element to allow Components such as the MetadataTable to display this information properly. 53 53 * @author John Thompson 54 54 * @version 2.3 55 55 */ 56 public class ElementWrapper 56 public class ElementWrapper 57 57 implements Comparable { 58 58 /** The DOM element this wrapper is wrapped around. */ … … 62 62 /** Constructor for elements with no namespace necessary. 63 63 * @param element The DOM <strong>Element</strong> this is to be based on. 64 */ 64 */ 65 65 public ElementWrapper(Element element) { 66 66 this.element = element; … … 69 69 this.namespace = parent.getAttribute("namespace"); 70 70 } 71 71 72 72 } 73 73 … … 94 94 return toString().compareTo(object.toString()); 95 95 } 96 /** Decrement the number of occurances of this metadata element. 96 /** Decrement the number of occurances of this metadata element. 97 97 * @see org.greenstone.gatherer.msm.MSMUtils 98 98 */ … … 179 179 } 180 180 181 /** Increment the number of occurances of this metadata element. 181 /** Increment the number of occurances of this metadata element. 182 182 * @see org.greenstone.gatherer.msm.MSMUtils 183 183 */ … … 185 185 MSMUtils.setOccurance(element, 1); 186 186 } 187 187 188 188 public boolean isHierarchy() { 189 189 return element.getAttribute(StaticStrings.HIERARCHY_ATTRIBUTE).equalsIgnoreCase(StaticStrings.TRUE_STR); … … 227 227 228 228 // Return just the element name, unless the element identifier differs 229 if (element_name_no_namespace.equals(element_identifier)) {229 //if (element_name_no_namespace.equals(element_identifier)) { 230 230 return element_name; 231 }232 else {233 return element_name + " (" + element_identifier + ")";234 }231 //} 232 //else { 233 // return element_name + " (" + element_identifier + ")"; 234 //} 235 235 } 236 236 } -
trunk/gli/src/org/greenstone/gatherer/msm/GDMDocument.java
r6029 r6051 45 45 */ 46 46 public class GDMDocument { 47 /** Record if the document this object is based on is up to date. */ 48 private boolean up_to_date = true; 49 /** The document this class sources its data from. */ 50 private Document base_document; 51 static final private String ACCUMULATE = "accumulate"; 52 /** The pattern to match when searching for directory level assignments. */ 53 static final private String DIRECTORY_FILENAME = ".*"; 54 static final private String DESCRIPTION_ELEMENT = "Description"; 55 static final private String FILENAME_ELEMENT = "FileName"; 56 static final private String FILESET_ELEMENT = "FileSet"; 57 static final private String HVALUE_ATTRIBUTE = "hvalue"; 58 static final private String MODE_ATTRIBUTE = "mode"; 59 static final private String OVERWRITE = "overwrite"; 60 61 /** Constructor which creates a brand new metadata.xml document. */ 62 public GDMDocument() { 47 /** Record if the document this object is based on is up to date. */ 48 private boolean up_to_date = true; 49 /** The document this class sources its data from. */ 50 private Document base_document; 51 static final private String ACCUMULATE = "accumulate"; 52 /** The pattern to match when searching for directory level assignments. */ 53 static final private String DIRECTORY_FILENAME = ".*"; 54 static final private String DESCRIPTION_ELEMENT = "Description"; 55 static final private String FILENAME_ELEMENT = "FileName"; 56 static final private String FILESET_ELEMENT = "FileSet"; 57 static final private String HVALUE_ATTRIBUTE = "hvalue"; 58 static final private String MODE_ATTRIBUTE = "mode"; 59 static final private String OVERWRITE = "overwrite"; 60 static final private String[] ALL_METADATA_TYPES = {StaticStrings.METADATA_ELEMENT, StaticStrings.EXTRACTED_METADATA_ELEMENT}; 61 62 /** Constructor which creates a brand new metadata.xml document. */ 63 public GDMDocument() { 63 64 // Create new document. We do this by loading a copy of the template. */ 64 65 this.base_document = Utility.parse(Utility.GREENSTONEDIRECTORYMETADATA_TEMPLATE, true); 65 66 67 68 66 } 67 68 /** Constructor which parses an existing metadata.xml document. */ 69 public GDMDocument(File file) { 69 70 try { 70 71 this.base_document = Utility.parse(file.getAbsolutePath(), false); 71 72 } 72 73 catch (Exception error) { 73 74 } 75 76 77 78 74 // Poorly formed, or completely invalid metadata.xml file! 75 } 76 } 77 78 /** Constructor which wraps around an existing metadata.xml document. */ 79 public GDMDocument(Document base_document) { 79 80 this.base_document = base_document; 80 81 82 83 81 } 82 83 /** Add this metadata to the named file. There is one tricky thing to consider. Whenever a metadata entry is added it is taken to be accumulating except if it is the first added, in which case it overwrites! Actually this gets worse, as we could have been told to append this metadata to a document which already inherits metadata. Thus we need a new argument to determine whether this add was triggered by an append or a replace. */ 84 public void addMetadata(String filename, Metadata metadata, boolean force_accumulate) { 84 85 Gatherer.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory.")); 85 86 try { 86 87 88 89 90 91 92 87 // Retrieve the document element. 88 Element directorymetadata_element = base_document.getDocumentElement(); 89 // Iterate through the filesets looking for one that matches the given filename. 90 Element fileset_element = null; 91 boolean found = false; 92 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 93 for(int i = 0; !found && i < fileset_elements.getLength(); i++) { 93 94 fileset_element = (Element) fileset_elements.item(i); 94 95 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); 95 96 for(int j = 0; !found && j < filename_elements.getLength(); j++) { 96 97 98 99 97 Element filename_element = (Element) filename_elements.item(j); 98 String filename_pattern = MSMUtils.getValue(filename_element); 99 // Have we found a match. If so break out of for loop. 100 if(filename != null && filename.matches(filename_pattern) && !filename_pattern.equals(DIRECTORY_FILENAME)) { 100 101 ///ystem.err.println("Adding to existing file fileset!"); 101 102 found = true; 102 103 103 } 104 else if(filename == null && filename_pattern.equals(DIRECTORY_FILENAME)) { 104 105 ///ystem.err.println("Adding to existing folder fileset!"); 105 106 ///ystem.err.println("filename_pattern = '" + filename_pattern + "'"); 106 107 found = true; 107 108 109 108 } 109 // No match. On to the next one. 110 else { 110 111 fileset_element = null; 111 112 113 114 } 115 116 117 118 112 } 113 filename_pattern = null; 114 filename_element = null; 115 } 116 } 117 fileset_elements = null; 118 // If we still haven't found an existing fileset, then its time to create one. 119 if(fileset_element == null) { 119 120 ///ystem.err.println("Creating a new fileset."); 120 121 fileset_element = base_document.createElement(FILESET_ELEMENT); … … 126 127 // If the filename is null then we add a directory metadata set as directorymetadata_element's first child 127 128 if(filename == null) { 128 129 129 filename_text = base_document.createTextNode(DIRECTORY_FILENAME); 130 if(directorymetadata_element.hasChildNodes()) { 130 131 directorymetadata_element.insertBefore(fileset_element, directorymetadata_element.getFirstChild()); 131 132 132 } 133 else { 133 134 directorymetadata_element.appendChild(fileset_element); 134 135 } 135 136 } 136 137 // Otherwise we just append the new fileset to directorymetadata_element's children. 137 138 else { 138 139 139 filename_text = base_document.createTextNode(filename); 140 directorymetadata_element.appendChild(fileset_element); 140 141 } 141 142 filename_element.appendChild(filename_text); … … 143 144 description_element = null; 144 145 filename_element = null; 145 } 146 // Now, finally, we can add the metadata. 147 Element metadata_element = base_document.createElement(StaticStrings.METADATA_ELEMENT); 148 String name = metadata.getElement().getName(); 149 if(name.startsWith(Utility.EXTRACTED_METADATA_NAMESPACE)) { 150 name = name.substring(Utility.EXTRACTED_METADATA_NAMESPACE.length() + 1); 151 } 152 metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name); 153 154 // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset. 155 boolean will_accumulate = false; 156 NodeList sibling_description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 157 for(int k = 0; !will_accumulate && k < sibling_description_elements.getLength(); k++) { 146 } 147 // Now, finally, we can add the metadata. 148 Element metadata_element = null; 149 String name = metadata.getElement().getName(); 150 // If this is extracted metadata, we use a special element name that won't be recognized by greenstone 151 if(name.startsWith(Utility.EXTRACTED_METADATA_NAMESPACE)) { 152 metadata_element = base_document.createElement(ALL_METADATA_TYPES[1]); 153 name = name.substring(Utility.EXTRACTED_METADATA_NAMESPACE.length() + 1); 154 } 155 else { 156 metadata_element = base_document.createElement(ALL_METADATA_TYPES[0]); 157 } 158 metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name); 159 160 // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset. 161 boolean will_accumulate = false; 162 NodeList sibling_description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 163 for(int k = 0; !will_accumulate && k < sibling_description_elements.getLength(); k++) { 158 164 Element sibling_description_element = (Element) sibling_description_elements.item(k); 159 NodeList sibling_metadata_elements = sibling_description_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT); 160 for(int l = 0; !will_accumulate && l < sibling_metadata_elements.getLength(); l++) { 161 Element sibling_metadata_element = (Element) sibling_metadata_elements.item(l); 162 // It appears that its possible that we can be asked to add the same metadata twice (especially after a copy action is cancelled then repeated). So we check if we have been asked to add exactly the same value twice. 163 if(sibling_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE))) { 164 // Check the values and return if they are the same. 165 if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sibling_metadata_element))) { 166 return; 167 } 168 will_accumulate = true; 169 } 170 sibling_metadata_element = null; 171 } 172 sibling_metadata_elements = null; 165 // We have to do this for each type of metadata 166 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 167 NodeList sibling_metadata_elements = sibling_description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 168 for(int l = 0; !will_accumulate && l < sibling_metadata_elements.getLength(); l++) { 169 Element sibling_metadata_element = (Element) sibling_metadata_elements.item(l); 170 // It appears that its possible that we can be asked to add the same metadata twice (especially after a copy action is cancelled then repeated). So we check if we have been asked to add exactly the same value twice. 171 if(sibling_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE))) { 172 // Check the values and return if they are the same. 173 if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sibling_metadata_element))) { 174 return; 175 } 176 will_accumulate = true; 177 } 178 sibling_metadata_element = null; 179 } 180 sibling_metadata_elements = null; 181 } 173 182 sibling_description_element = null; 174 175 176 183 } 184 sibling_description_elements = null; 185 if(will_accumulate || force_accumulate) { //mode.equals(ACCUMULATE)) { 177 186 metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE); 178 187 } 179 188 // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm writing the value out as a full path string 180 189 GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement()); … … 186 195 node_value = metadata.getAbsoluteValue(); 187 196 } 188 189 190 191 192 193 194 195 197 ///ystem.err.println("Creating node in GDMDocument: '" + node_value + "'"); 198 metadata_element.appendChild(base_document.createTextNode(node_value)); 199 // Retrieve the first description element for this fileset (there should only be one, but I'll play it safe). 200 NodeList description_elements = fileset_element.getElementsByTagName("Description"); 201 Element description_element = (Element) description_elements.item(0); 202 description_element.appendChild(metadata_element); 203 description_element = null; 204 metadata_element = null; 196 205 //mode = null; 197 198 199 206 fileset_element = null; 207 directorymetadata_element = null; 208 up_to_date = false; 200 209 } 201 210 catch (Exception error) { 202 203 } 204 205 206 211 Gatherer.printStackTrace(error); 212 } 213 } 214 215 public int countMetadata() { 207 216 int count = 0; 208 217 try { 209 210 211 212 213 218 // Retrieve the document element. 219 Element directorymetadata_element = base_document.getDocumentElement(); 220 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching. 221 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 222 for(int i = 0; i < fileset_elements.getLength(); i++) { 214 223 Element fileset_element = (Element) fileset_elements.item(i); 215 224 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 216 225 for(int k = 0; k < description_elements.getLength(); k++) { 217 Element description_element = (Element) description_elements.item(k); 218 NodeList metadata_elements = description_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT); 219 count = count + metadata_elements.getLength(); 220 metadata_elements = null; 221 description_element = null; 226 Element description_element = (Element) description_elements.item(k); 227 // We have to do this for each type of metadata 228 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 229 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 230 count = count + metadata_elements.getLength(); 231 metadata_elements = null; 232 } 233 description_element = null; 222 234 } 223 235 description_elements = null; 224 236 fileset_element = null; 225 226 227 237 } 238 fileset_elements = null; 239 directorymetadata_element = null; 228 240 } 229 241 catch (Exception error) { 230 242 Gatherer.printStackTrace(error); 231 243 } 232 244 return count; 233 234 235 236 245 } 246 247 /** Retrieve the document this class is wrapping. */ 248 public Document getDocument() { 237 249 return base_document; 238 239 240 250 } 251 /** Get all of the metadata, including directory level, associated with this file. */ 252 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level) { 241 253 return getMetadata(filename, remove, metadatum_so_far, file, append_folder_level, false); 242 243 244 254 } 255 /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. Finally if purge is set retrieve every piece of metadata in this file. */ 256 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level, boolean purge) { 245 257 Gatherer.println("Get metadata for " + filename); 246 258 ArrayList metadatum = null; 247 259 if(metadatum_so_far == null) { 248 260 metadatum = new ArrayList(); 249 261 } 250 262 else { 251 263 metadatum = metadatum_so_far; 252 264 } 253 265 try { 254 255 256 257 258 266 // Retrieve the document element. 267 Element directorymetadata_element = base_document.getDocumentElement(); 268 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching. 269 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 270 for(int i = 0; i < fileset_elements.getLength(); i++) { 259 271 Element fileset_element = (Element) fileset_elements.item(i); 260 272 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); 261 273 for(int j = 0; j < filename_elements.getLength(); j++) { 262 263 264 274 Element filename_element = (Element) filename_elements.item(j); 275 String filename_text = MSMUtils.getValue(filename_element); 276 if((filename != null && (filename.matches(filename_text) || (append_folder_level && filename.indexOf(File.separator) != -1 && filename_text.equals(filename.substring(0, filename.indexOf(File.separator)))))) || ((filename == null || append_folder_level) && filename_text.equals(DIRECTORY_FILENAME)) || purge) { 265 277 // If they match add all of the metadata found in the Description child element, remembering to abide by desired mode (accumulate vs. overwrite). 278 // Normal metadata 266 279 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 267 280 for(int k = 0; k < description_elements.getLength(); k++) { 268 Element description_element = (Element) description_elements.item(k); 269 NodeList metadata_elements = description_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT); 270 for(int l = 0; l < metadata_elements.getLength(); l++) { 271 Element metadata_element = (Element) metadata_elements.item(l); 272 String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE); 273 //String language = metadata_element.getAttribute("language"); 274 String mode = metadata_element.getAttribute(MODE_ATTRIBUTE); 275 String raw_value = MSMUtils.getValue(metadata_element); 276 // Raw value is in GREENSTONE form, convert to DOM 277 raw_value = Codec.transform(raw_value, Codec.GREENSTONE_TO_DOM); 278 // ***** LEGACY SUPPORT ***** 279 // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\' 280 if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) { 281 Gatherer.println("Detected Legacy Path: " + raw_value); 282 raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR); 283 Gatherer.println("Updated Path To: " + raw_value); 284 MSMUtils.setValue(metadata_element, raw_value); 281 Element description_element = (Element) description_elements.item(k); 282 // We have to do this for each type of metadata 283 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 284 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 285 for(int l = 0; l < metadata_elements.getLength(); l++) { 286 Element metadata_element = (Element) metadata_elements.item(l); 287 String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE); 288 //String language = metadata_element.getAttribute("language"); 289 String mode = metadata_element.getAttribute(MODE_ATTRIBUTE); 290 String raw_value = MSMUtils.getValue(metadata_element); 291 // Raw value is in GREENSTONE form, convert to DOM 292 raw_value = Codec.transform(raw_value, Codec.GREENSTONE_TO_DOM); 293 // ***** LEGACY SUPPORT ***** 294 // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\' 295 if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) { 296 Gatherer.println("Detected Legacy Path: " + raw_value); 297 raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR); 298 Gatherer.println("Updated Path To: " + raw_value); 299 MSMUtils.setValue(metadata_element, raw_value); 300 } 301 // ************************** 302 // Using the element string and value, retrieve a matching Metadata object from the cache 303 Metadata metadata = null; 304 // If this element has hierarchy values then we must ensure the raw value is a full path, not an index. 305 // Try to retrieve an already comstructed piece of metadata from file - but not if we are purging, as this will stuff up anything that is still using that metadata - such as the GTable 306 if(GDMManager.metadata_cache.contains(raw_element, raw_value) && !purge) { 307 ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n"); 308 metadata = (Metadata) GDMManager.metadata_cache.get(raw_element, raw_value); 309 } 310 else { 311 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element); 312 if (element != null) { 313 GValueNode value = Metadata.getDefaultValueNode(element, raw_value); 314 ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n"); 315 metadata = new Metadata(element, value); 316 if(!purge) { 317 GDMManager.metadata_cache.put(raw_element, raw_value, metadata); 318 } 319 ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n"); 320 value = null; 321 element = null; 322 } 323 } 324 // check whether the metadata is null 325 if (metadata != null) { 326 // We determine whether this metadata is file or folder level 327 if(filename != null) { 328 ///ystem.err.println("Filename = " + filename); 329 ///ystem.err.println("filename_text = " + filename_text); 330 // If can only be file level if there is no folder path details in filename and if the filename matched the filename text node (it may have matched .* instead)! 331 if(filename.indexOf(File.separator) == -1 && filename.equals(filename_text)) { 332 metadata.setFileLevel(true); 333 ///ystem.err.println("File level!!!"); 334 } 335 else { 336 metadata.setFileLevel(false); 337 ///ystem.err.println("Inherited!!!"); 338 } 339 } 340 else { 341 ///ystem.err.println("Filename is null therefore this is file level metadata."); 342 metadata.setFileLevel(true); 343 } 344 metadata.setFile(file); 345 // If mode is overwrite, then remove any previous values for this metadata element. 346 if(mode.equals("accumulate")) { 347 metadata.setAccumulate(true); 348 } 349 else { 350 metadata.setAccumulate(false); 351 ///ystem.err.println("Metadata overwrites: " + metadata); 352 for(int m = metadatum.size() - 1; m >= 0; m--) { 353 Metadata old_metadata = (Metadata) metadatum.get(m); 354 if(old_metadata.getElement().equals(metadata.getElement())) { 355 metadatum.remove(m); 356 ///ystem.err.println("Removing overridden metadata: " + old_metadata); 357 } 358 old_metadata = null; 359 } 360 } 361 mode = null; 362 // Add the completed metadata and clean up 363 ///ystem.err.println("Adding metadata: " + metadata); 364 metadatum.add(metadata); 365 // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete). 366 // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level. 367 if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) { 368 ///ystem.err.println("Removing " + metadata + " from " + file); 369 description_element.removeChild(metadata_element); 370 // Remove the description element if empty. 371 if(!description_element.hasChildNodes()) { 372 fileset_element.removeChild(description_element); 373 } 374 } 375 else { 376 //String current_value = metadata.getAbsoluteValue(); 377 String current_value = Codec.transform(metadata.getValueNode().getFullPath(false), Codec.TEXT_TO_DOM); 378 ///ystem.err.println("Checking the current hfile: " + current_value); 379 ///ystem.err.println("Against whats in the hfile: " + current_value); 380 if(!raw_value.equals(current_value)) { 381 // Remove old text 382 while(metadata_element.hasChildNodes()) { 383 metadata_element.removeChild(metadata_element.getFirstChild()); 384 } 385 // Add new. 386 metadata_element.appendChild(base_document.createTextNode(current_value)); 387 } 388 } 389 } 390 metadata = null; 391 raw_value = null; 392 raw_element = null; 393 metadata_element = null; 394 } 395 metadata_elements = null; 285 396 } 286 // ************************** 287 // Using the element string and value, retrieve a matching Metadata object from the cache 288 Metadata metadata = null; 289 // If this element has hierarchy values then we must ensure the raw value is a full path, not an index. 290 // Try to retrieve an already comstructed piece of metadata from file - but not if we are purging, as this will stuff up anything that is still using that metadata - such as the GTable 291 if(GDMManager.metadata_cache.contains(raw_element, raw_value) && !purge) { 292 ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n"); 293 metadata = (Metadata) GDMManager.metadata_cache.get(raw_element, raw_value); 294 } 295 else { 296 ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element); 297 if (element != null) { 298 299 GValueNode value = Metadata.getDefaultValueNode(element, raw_value); 300 ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n"); 301 metadata = new Metadata(element, value); 302 if(!purge) { 303 GDMManager.metadata_cache.put(raw_element, raw_value, metadata); 304 } 305 ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n"); 306 value = null; 307 element = null; 308 } 309 } 310 // check whether the metadata is null 311 if (metadata != null) { 312 // We determine whether this metadata is file or folder level 313 if(filename != null) { 314 ///ystem.err.println("Filename = " + filename); 315 ///ystem.err.println("filename_text = " + filename_text); 316 // If can only be file level if there is no folder path details in filename and if the filename matched the filename text node (it may have matched .* instead)! 317 if(filename.indexOf(File.separator) == -1 && filename.equals(filename_text)) { 318 metadata.setFileLevel(true); 319 ///ystem.err.println("File level!!!"); 320 } 321 else { 322 metadata.setFileLevel(false); 323 ///ystem.err.println("Inherited!!!"); 324 } 325 } 326 else { 327 ///ystem.err.println("Filename is null therefore this is file level metadata."); 328 metadata.setFileLevel(true); 329 } 330 metadata.setFile(file); 331 332 // If mode is overwrite, then remove any previous values for this metadata element. 333 if(mode.equals("accumulate")) { 334 metadata.setAccumulate(true); 335 } 336 else { 337 metadata.setAccumulate(false); 338 ///ystem.err.println("Metadata overwrites: " + metadata); 339 for(int m = metadatum.size() - 1; m >= 0; m--) { 340 Metadata old_metadata = (Metadata) metadatum.get(m); 341 if(old_metadata.getElement().equals(metadata.getElement())) { 342 metadatum.remove(m); 343 ///ystem.err.println("Removing overridden metadata: " + old_metadata); 344 } 345 old_metadata = null; 346 } 347 } 348 mode = null; 349 350 // Add the completed metadata and clean up 351 ///ystem.err.println("Adding metadata: " + metadata); 352 metadatum.add(metadata); 353 354 // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete). 355 // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level. 356 if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) { 357 ///ystem.err.println("Removing " + metadata + " from " + file); 358 description_element.removeChild(metadata_element); 359 // Remove the description element if empty. 360 if(!description_element.hasChildNodes()) { 361 fileset_element.removeChild(description_element); 362 } 363 } 364 else { 365 //String current_value = metadata.getAbsoluteValue(); 366 String current_value = Codec.transform(metadata.getValueNode().getFullPath(false), Codec.TEXT_TO_DOM); 367 ///ystem.err.println("Checking the current hfile: " + current_value); 368 ///ystem.err.println("Against whats in the hfile: " + current_value); 369 if(!raw_value.equals(current_value)) { 370 // Remove old text 371 while(metadata_element.hasChildNodes()) { 372 metadata_element.removeChild(metadata_element.getFirstChild()); 373 } 374 // Add new. 375 metadata_element.appendChild(base_document.createTextNode(current_value)); 376 } 377 } 378 } 379 metadata = null; 380 raw_value = null; 381 raw_element = null; 382 metadata_element = null; 383 } 384 metadata_elements = null; 385 description_element = null; 397 description_element = null; 386 398 } 387 399 description_elements = null; 388 389 390 400 } 401 filename_text = null; 402 filename_element = null; 391 403 } 392 404 // If the file set no longer has any description entries, remove it entirely 393 405 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 394 406 if(description_elements.getLength() == 0) { 395 407 directorymetadata_element.removeChild(fileset_element); 396 408 } 397 409 description_elements = null; 398 410 filename_elements = null; 399 411 fileset_element = null; 400 401 402 412 } 413 fileset_elements = null; 414 directorymetadata_element = null; 403 415 } 404 416 catch (Exception error) { 405 417 Gatherer.self.printStackTrace(error); 406 418 } 407 419 ///ystem.err.println("Found " + metadatum.size() + " pieces of metadata."); 408 420 return metadatum; 409 410 411 412 421 } 422 423 /** Determine if this document has been saved recently, and thus xml file version is up to date. */ 424 public boolean isUpToDate() { 413 425 return false; 414 415 416 417 426 } 427 428 /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */ 429 public boolean isValid() { 418 430 // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata. 419 431 String doctype_name = base_document.getDoctype().getName(); 420 432 String root_name = base_document.getDocumentElement().getTagName(); 421 433 return ((doctype_name.equals("GreenstoneDirectoryMetadata") && root_name.equals("GreenstoneDirectoryMetadata")) || (doctype_name.equals("DirectoryMetadata") && root_name.equals("DirectoryMetadata"))); 422 } 423 /** Remove the given directory level metadata from this document. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */ 424 public void removeMetadata(String filename, Metadata metadata) { 434 } 435 /** Remove the given directory level metadata from this document. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */ 436 public void removeMetadata(String filename, Metadata metadata) { 437 Gatherer.println("Remove metadata: " + metadata + "\nFrom filename: " + filename); 425 438 try { 426 427 428 429 430 431 432 433 434 439 boolean found = false; 440 boolean first_metadata_element_found = true; 441 boolean make_next_metadata_element_overwrite = false; 442 boolean remove_fileset = false; 443 // Retrieve the document element. 444 Element directorymetadata_element = base_document.getDocumentElement(); 445 // Iterate through the filesets looking for the directory level one. 446 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 447 for(int i = 0; !found && i < fileset_elements.getLength(); i++) { 435 448 Element fileset_element = (Element) fileset_elements.item(i); 436 449 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); 437 450 for(int j = 0; !found && j < filename_elements.getLength(); j++) { 438 439 440 451 Element filename_element = (Element) filename_elements.item(j); 452 String filename_text = MSMUtils.getValue(filename_element); 453 if((filename != null && filename.matches(filename_text) && !filename.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) { 441 454 // Retrieve the Metadata Element for this fileset, and iterate through them looking for the one which we are to remove. 442 455 NodeList description_elements = fileset_element.getElementsByTagName("Description"); 443 456 for(int k = 0; !found && k < description_elements.getLength(); k++) { 444 Element description_element = (Element) description_elements.item(k); 445 NodeList metadata_elements = description_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT); 446 for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) { 447 Element metadata_element = (Element) metadata_elements.item(l); 448 String element = metadata_element.getAttribute("name"); 449 String value = MSMUtils.getValue(metadata_element); 450 // See if this is the metadata we wish to remove 451 if(element.equals(metadata.getElement().getName())) { 452 if(value.equals(metadata.getAbsoluteValue())) { 453 // Remove it 454 ///ystem.err.println("Remove " + element + "-" + value); 455 description_element.removeChild(metadata_element); 456 found = true; 457 // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now. 458 if(first_metadata_element_found && !metadata.accumulates()) { 459 ///ystem.err.println("First of this element found!"); 460 make_next_metadata_element_overwrite = true; 461 } 462 } 463 // If this was the first metadata we've found with the element of the one to be removed set first found to false. 464 else if(first_metadata_element_found) { 465 ///ystem.err.println("Found a matching element: " + element + "=" + value); 466 first_metadata_element_found = false; 467 } 468 // Otherwise we should make this metadata overwrite as requested. 469 else if(make_next_metadata_element_overwrite) { 470 ///ystem.err.println("Changing to overwrite: " + element + "=" + value); 471 metadata_element.setAttribute(MODE_ATTRIBUTE, ""); 472 } 457 Element description_element = (Element) description_elements.item(k); 458 // We have to do this for each type of metadata 459 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 460 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 461 for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) { 462 Element metadata_element = (Element) metadata_elements.item(l); 463 String element = metadata_element.getAttribute("name"); 464 String value = MSMUtils.getValue(metadata_element); 465 // See if this is the metadata we wish to remove 466 if(element.equals(metadata.getElement().getName())) { 467 if(value.equals(metadata.getAbsoluteValue())) { 468 // Remove it 469 ///ystem.err.println("Remove " + element + "-" + value); 470 description_element.removeChild(metadata_element); 471 found = true; 472 // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now. 473 if(first_metadata_element_found && !metadata.accumulates()) { 474 ///ystem.err.println("First of this element found!"); 475 make_next_metadata_element_overwrite = true; 476 } 477 } 478 // If this was the first metadata we've found with the element of the one to be removed set first found to false. 479 else if(first_metadata_element_found) { 480 ///ystem.err.println("Found a matching element: " + element + "=" + value); 481 first_metadata_element_found = false; 482 } 483 // Otherwise we should make this metadata overwrite as requested. 484 else if(make_next_metadata_element_overwrite) { 485 ///ystem.err.println("Changing to overwrite: " + element + "=" + value); 486 metadata_element.setAttribute(MODE_ATTRIBUTE, ""); 487 } 488 } 489 value = null; 490 element = null; 491 metadata_element = null; 492 } 493 metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 494 // If we found it, removed it, and now the description tag has no children, mark the fileset for removal 495 if(metadata_elements.getLength() == 0) { 496 remove_fileset = true; 497 } 498 metadata_elements = null; 473 499 } 474 value = null; 475 element = null; 476 metadata_element = null; 477 } 478 metadata_elements = description_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT); 479 // If we found it, removed it, and now the description tag has no children, mark the fileset for removal 480 if(metadata_elements.getLength() == 0) { 481 remove_fileset = true; 482 } 483 metadata_elements = null; 484 description_element = null; 500 description_element = null; 485 501 } 486 502 description_elements = null; 487 488 489 503 } 504 filename_text = null; 505 filename_element = null; 490 506 } 491 507 filename_elements = null; 492 508 if(found && remove_fileset) { 493 509 directorymetadata_element.removeChild(fileset_element); 494 510 } 495 511 fileset_element = null; 496 497 498 499 512 } 513 fileset_elements = null; 514 directorymetadata_element = null; 515 up_to_date = false; 500 516 } 501 517 catch (Exception error) { 502 503 } 504 505 506 507 518 Gatherer.printStackTrace(error); 519 } 520 } 521 522 /** Change the up to date flag. */ 523 public void setUpToDate(boolean up_to_date) { 508 524 this.up_to_date = up_to_date; 509 525 } 510 526 } -
trunk/gli/src/org/greenstone/gatherer/msm/GreenstoneArchiveParser.java
r6047 r6051 139 139 for(int k = 0; !found && k < metadatum.size(); k++) { 140 140 Metadata sibling = (Metadata) metadatum.get(k); 141 ///ystem.err.println("Comparing " + element + " to " + sibling.getElement()); 141 142 if(element.equals(sibling.getElement())) { 143 ///ystem.err.println("Removing metadata for: " + sibling); 142 144 Gatherer.c_man.getCollection().gdm.metadataChanged(new MSMEvent(this, System.currentTimeMillis(), target_file, sibling, null)); 143 145 }
Note:
See TracChangeset
for help on using the changeset viewer.