Changeset 6827
- Timestamp:
- 2004-02-18T13:11:14+13:00 (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gli/src/org/greenstone/gatherer/msm/GDMDocument.java
r6539 r6827 45 45 */ 46 46 public class GDMDocument { 47 48 49 50 51 52 53 54 55 56 57 58 59 60 47 /** Record if the document this object is based on is up to date. */ 48 private boolean up_to_date = true; 49 /** The document this class sources its data from. */ 50 private Document base_document; 51 static final private String ACCUMULATE = "accumulate"; 52 /** The pattern to match when searching for directory level assignments. */ 53 static final private String DIRECTORY_FILENAME = ".*"; 54 static final private String DESCRIPTION_ELEMENT = "Description"; 55 static final private String FILENAME_ELEMENT = "FileName"; 56 static final private String FILESET_ELEMENT = "FileSet"; 57 static final private String HVALUE_ATTRIBUTE = "hvalue"; 58 static final private String MODE_ATTRIBUTE = "mode"; 59 static final private String OVERWRITE = "overwrite"; 60 static final private String[] ALL_METADATA_TYPES = {StaticStrings.METADATA_ELEMENT, StaticStrings.EXTRACTED_METADATA_ELEMENT}; 61 61 62 62 /** Constructor which creates a brand new metadata.xml document. */ 63 63 public GDMDocument() { 64 64 // Create new document. We do this by loading a copy of the template. */ 65 65 this.base_document = Utility.parse(Utility.GREENSTONEDIRECTORYMETADATA_TEMPLATE, true); 66 67 68 69 66 } 67 68 /** Constructor which parses an existing metadata.xml document. */ 69 public GDMDocument(File file) { 70 70 try { 71 71 this.base_document = Utility.parse(file.getAbsolutePath(), false); 72 72 } 73 73 catch (Exception error) { 74 75 } 76 77 78 79 74 // Poorly formed, or completely invalid metadata.xml file! 75 } 76 } 77 78 /** Constructor which wraps around an existing metadata.xml document. */ 79 public GDMDocument(Document base_document) { 80 80 this.base_document = base_document; 81 81 } 82 82 83 83 /** Add this metadata to the named file. There is one tricky thing to consider. Whenever a metadata entry is added it is taken to be accumulating except if it is the first added, in which case it overwrites! Actually this gets worse, as we could have been told to append this metadata to a document which already inherits metadata. Thus we need a new argument to determine whether this add was triggered by an append or a replace. */ 84 public void addMetadata(String filename, Metadata metadata, boolean force_accumulate) { 85 ///atherer.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory.")); 84 public void addMetadata(String filename, Metadata metadata, boolean force_accumulate) 85 { 86 if (filename != null) { 87 filename = Codec.transform(filename, Codec.TEXT_TO_REGEXP); 88 } 89 /// System.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory.")); 86 90 try { 87 91 // Retrieve the document element. … … 91 95 boolean found = false; 92 96 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 93 97 for(int i = 0; !found && i < fileset_elements.getLength(); i++) { 94 98 fileset_element = (Element) fileset_elements.item(i); 95 99 NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT); 96 100 for(int j = 0; !found && j < filename_elements.getLength(); j++) { 97 Element filename_element = (Element) filename_elements.item(j); 98 String filename_pattern = MSMUtils.getValue(filename_element); 99 // Have we found a match. If so break out of for loop. 100 if(filename != null && filename.matches(filename_pattern) && !filename_pattern.equals(DIRECTORY_FILENAME)) { 101 Element filename_element = (Element) filename_elements.item(j); 102 String filename_pattern = MSMUtils.getValue(filename_element); 103 filename_pattern = Codec.transform(filename_pattern, Codec.TEXT_TO_REGEXP); 104 // System.err.println("Checking " + filename + " against " + filename_pattern + "|"); 105 // Have we found a match. If so break out of for loop. 106 if(filename != null && filename.matches(filename_pattern) && !filename_pattern.equals(DIRECTORY_FILENAME)) { 101 107 ///ystem.err.println("Adding to existing file fileset!"); 102 108 found = true; 103 104 109 } 110 else if(filename == null && filename_pattern.equals(DIRECTORY_FILENAME)) { 105 111 ///ystem.err.println("Adding to existing folder fileset!"); 106 112 ///ystem.err.println("filename_pattern = '" + filename_pattern + "'"); 107 113 found = true; 108 109 110 114 } 115 // No match. On to the next one. 116 else { 111 117 fileset_element = null; 112 113 114 115 } 116 117 118 119 118 } 119 filename_pattern = null; 120 filename_element = null; 121 } 122 } 123 fileset_elements = null; 124 // If we still haven't found an existing fileset, then its time to create one. 125 if(fileset_element == null) { 120 126 ///ystem.err.println("Creating a new fileset."); 121 127 fileset_element = base_document.createElement(FILESET_ELEMENT); … … 127 133 // If the filename is null then we add a directory metadata set as directorymetadata_element's first child 128 134 if(filename == null) { 129 130 135 filename_text = base_document.createTextNode(DIRECTORY_FILENAME); 136 if(directorymetadata_element.hasChildNodes()) { 131 137 directorymetadata_element.insertBefore(fileset_element, directorymetadata_element.getFirstChild()); 132 133 138 } 139 else { 134 140 directorymetadata_element.appendChild(fileset_element); 135 141 } 136 142 } 137 143 // Otherwise we just append the new fileset to directorymetadata_element's children. 138 144 else { 139 140 145 filename_text = base_document.createTextNode(filename); 146 directorymetadata_element.appendChild(fileset_element); 141 147 } 142 148 filename_element.appendChild(filename_text); … … 144 150 description_element = null; 145 151 filename_element = null; 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 152 } 153 // Now, finally, we can add the metadata. 154 Element metadata_element = null; 155 String name = metadata.getElement().getName(); 156 // If this is extracted metadata, we use a special element name that won't be recognized by greenstone 157 if(name.startsWith(Utility.EXTRACTED_METADATA_NAMESPACE)) { 158 metadata_element = base_document.createElement(ALL_METADATA_TYPES[1]); 159 name = name.substring(Utility.EXTRACTED_METADATA_NAMESPACE.length() + 1); 160 } 161 else { 162 metadata_element = base_document.createElement(ALL_METADATA_TYPES[0]); 163 } 164 metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name); 165 166 // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset. 167 boolean will_accumulate = false; 168 NodeList sibling_description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 169 for(int k = 0; !will_accumulate && k < sibling_description_elements.getLength(); k++) { 164 170 Element sibling_description_element = (Element) sibling_description_elements.item(k); 165 171 // We have to do this for each type of metadata 166 172 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 167 168 169 173 NodeList sibling_metadata_elements = sibling_description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 174 for(int l = 0; !will_accumulate && l < sibling_metadata_elements.getLength(); l++) { 175 Element sibling_metadata_element = (Element) sibling_metadata_elements.item(l); 170 176 // It appears that its possible that we can be asked to add the same metadata twice (especially after a copy action is cancelled then repeated). So we check if we have been asked to add exactly the same value twice. 171 if(sibling_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE))) { 172 // Check the values and return if they are the same. 173 if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sibling_metadata_element))) { 174 return; 175 } 176 will_accumulate = true; 177 } 178 sibling_metadata_element = null; 177 if(sibling_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE).equals(metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE))) { 178 // Check the values and return if they are the same. 179 if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sibling_metadata_element))) { 180 return; 181 } 182 will_accumulate = true; 179 183 } 180 sibling_metadata_elements = null; 184 sibling_metadata_element = null; 185 } 186 sibling_metadata_elements = null; 181 187 } 182 188 sibling_description_element = null; 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 189 } 190 sibling_description_elements = null; 191 if(will_accumulate || force_accumulate) { //mode.equals(ACCUMULATE)) { 192 metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE); 193 } 194 // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm writing the value out as a full path string 195 GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement()); 196 String node_value = null; 197 if(model != null && model.isHierarchy()) { 198 //node_value = /odec.transform(metadata.getValueNode().getFullPath(false), /odec.TEXT_TO_DOM); 199 node_value = metadata.getValueNode().getFullPath(false); 200 } 201 else { 202 node_value = metadata.getAbsoluteValue(); 203 } 204 ///ystem.err.println("Creating node in GDMDocument: '" + node_value + "'"); 205 metadata_element.appendChild(base_document.createTextNode(node_value)); 206 // Retrieve the first description element for this fileset (there should only be one, but I'll play it safe). 207 NodeList description_elements = fileset_element.getElementsByTagName("Description"); 208 Element description_element = (Element) description_elements.item(0); 209 description_element.appendChild(metadata_element); 210 description_element = null; 211 metadata_element = null; 206 212 //mode = null; 207 208 209 213 fileset_element = null; 214 directorymetadata_element = null; 215 up_to_date = false; 210 216 } 211 217 catch (Exception error) { 212 213 } 214 215 216 218 Gatherer.printStackTrace(error); 219 } 220 } 221 222 public int countMetadata() { 217 223 int count = 0; 218 224 try { 219 220 221 222 223 225 // Retrieve the document element. 226 Element directorymetadata_element = base_document.getDocumentElement(); 227 // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching. 228 NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT); 229 for(int i = 0; i < fileset_elements.getLength(); i++) { 224 230 Element fileset_element = (Element) fileset_elements.item(i); 225 231 NodeList description_elements = fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT); 226 232 for(int k = 0; k < description_elements.getLength(); k++) { 227 228 229 230 231 232 233 234 233 Element description_element = (Element) description_elements.item(k); 234 // We have to do this for each type of metadata 235 for(int z = 0; z < ALL_METADATA_TYPES.length; z++) { 236 NodeList metadata_elements = description_element.getElementsByTagName(ALL_METADATA_TYPES[z]); 237 count = count + metadata_elements.getLength(); 238 metadata_elements = null; 239 } 240 description_element = null; 235 241 } 236 242 description_elements = null; 237 243 fileset_element = null; 238 239 240 244 } 245 fileset_elements = null; 246 directorymetadata_element = null; 241 247 } 242 248 catch (Exception error) { 243 249 Gatherer.printStackTrace(error); 244 250 } 245 251 return count; 246 247 248 249 252 } 253 254 /** Retrieve the document this class is wrapping. */ 255 public Document getDocument() { 250 256 return base_document; 251 252 253 254 257 } 258 259 /** Get all of the metadata, including directory level, associated with this file. */ 260 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level) { 255 261 return getMetadata(filename, remove, metadatum_so_far, file, append_folder_level, false); 256 262 } 257 263 /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. Finally if purge is set retrieve every piece of metadata in this file. */ 258 264 public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level, boolean purge) { … … 450 456 } 451 457 452 453 458 /** Determine if this document has been saved recently, and thus xml file version is up to date. */ 459 public boolean isUpToDate() { 454 460 return false; 455 456 457 458 461 } 462 463 /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */ 464 public boolean isValid() { 459 465 // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata. 460 466 String doctype_name = base_document.getDoctype().getName(); 461 467 String root_name = base_document.getDocumentElement().getTagName(); 462 468 return ((doctype_name.equals("GreenstoneDirectoryMetadata") && root_name.equals("GreenstoneDirectoryMetadata")) || (doctype_name.equals("DirectoryMetadata") && root_name.equals("DirectoryMetadata"))); 463 469 } 464 470 465 471 /** Remove all of the extracted metadata (XMetadata) from this document. */
Note:
See TracChangeset
for help on using the changeset viewer.