- Timestamp:
- 2003-06-11T13:49:35+12:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gli/src/org/greenstone/gatherer/msm/GreenstoneArchiveParser.java
r4369 r4563 58 58 private GShell shell; 59 59 60 static final String ignore_list[] = {"assocfilepath", "gsdl","Identifier","Source","URL"};60 static final String ignore_list[] = {"assocfilepath", "gsdl", "Source", "Identifier","URL"}; 61 61 62 62 public GreenstoneArchiveParser(GShellProgressMonitor progress, GShell shell) { … … 92 92 String file_path = null; 93 93 Element archive_element = document.getDocumentElement(); 94 94 // Retrieve all of the Metadata sections. 95 95 NodeList metadata_elements = archive_element.getElementsByTagName("Metadata"); 96 96 // Now for each Metadata entry retrieved... 97 97 for(int i = 0; i < metadata_elements.getLength(); i++) { 98 98 Element metadata_element = (Element) metadata_elements.item(i); … … 104 104 else { 105 105 // Check if its name starts with, or is equal to, one of the values in our ignore list, and if so ignore this metadata. 106 boolean ignore = (name.indexOf(".") != -1);106 boolean ignore = false; 107 107 for(int j = 0; !ignore && j < ignore_list.length; j++) { 108 108 ignore = name.startsWith(ignore_list[j]); … … 167 167 } 168 168 } 169 170 static final String metadata_ignore_list[] = {"assocfilepath", "gsdl", "Identifier","URL"}; 171 172 static public ArrayList extractMetadataElements(File archive_directory) { 173 ArrayList extracted_metadata_elements = new ArrayList(); 174 File document_directories[] = archive_directory.listFiles(); 175 for(int i = 0; i < document_directories.length; i++) { 176 // Find the doc.xml file within 177 if(document_directories[i].isDirectory()) { 178 File document_file = new File(document_directories[i], "doc.xml"); 179 // Then extract the metadata from it. 180 if(document_file.exists()) { 181 try { 182 Document document = Utility.parse(document_file, false); 183 // Retrieve all of the Metadata sections. 184 Element archive_element = document.getDocumentElement(); 185 NodeList metadata_elements = archive_element.getElementsByTagName("Metadata"); 186 // Now for each Metadata entry retrieved... 187 for(int j = 0; j < metadata_elements.getLength(); j++) { 188 Element metadata_element = (Element) metadata_elements.item(j); 189 String name = metadata_element.getAttribute("name"); 190 // Check if its name starts with, or is equal to, one of the values in our ignore list, and if so ignore this metadata. 191 boolean ignore = false; 192 for(int k = 0; !ignore && k < metadata_ignore_list.length; k++) { 193 ignore = name.startsWith(metadata_ignore_list[k]); 194 } 195 if(!ignore && !extracted_metadata_elements.contains(name)) { 196 extracted_metadata_elements.add(name); 197 } 198 name = null; 199 metadata_element = null; 200 } 201 metadata_elements = null; 202 archive_element = null; 203 document = null; 204 } 205 catch (Exception error) { 206 Gatherer.printStackTrace(error); 207 } 208 } 209 document_file = null; 210 } 211 } 212 document_directories = null; 213 return extracted_metadata_elements; 214 } 169 215 }
Note:
See TracChangeset
for help on using the changeset viewer.