source: main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java@ 34264

Last change on this file since 34264 was 34264, checked in by ak19, 4 years ago
  1. Added moveMetaXMLToCSV 2. Both this and exportMetaAsCSV now also made to work for the remote case. 3. Bugfix to oversight in GUIManager that in the previous commit used to ignore user selected csvfile and always created a metdata.csv in import folder. 4. Tidied up MetaToCSV.java some more.
  • Property svn:keywords set to Author Date Id Revision
File size: 43.1 KB
Line 
1/**
2 *############################################################################
3 * A component of the Greenstone Librarian Interface, part of the Greenstone
4 * digital library suite from the New Zealand Digital Library Project at the
5 * University of Waikato, New Zealand.
6 *
7 * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ
8 *
9 * Copyright (C) 2004 New Zealand Digital Library Project
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *############################################################################
25 */
26
27package org.greenstone.gatherer.metadata;
28
29
30import java.io.*;
31import java.util.*;
32import org.greenstone.gatherer.DebugStream;
33import org.greenstone.gatherer.collection.CollectionTreeNode;
34import org.greenstone.gatherer.util.XMLTools;
35import org.w3c.dom.*;
36
37import org.greenstone.gatherer.util.Utility;
38
39/** This class represents one metadata.xml file */
40public class MetadataXMLFile
41 extends File
42{
43 static final private String DESCRIPTION_ELEMENT = "Description";
44 static final private String DIRECTORY_FILENAME = ".*";
45 static final private String FILENAME_ELEMENT = "FileName";
46 static final private String FILESET_ELEMENT = "FileSet";
47 static final private String METADATA_ELEMENT = "Metadata";
48 static final private String[] nonEscapingElements = new String[]{FILENAME_ELEMENT};
49
50 /** Special metadata field: the filename encoding is a unique sort of metadata in
51 * that it is not just information stored with a collection file, but also needs to
52 * be applied in real-time to the collection file (to its filename) for display. */
53 static final public String FILENAME_ENCODING_METADATA = "gs.filenameEncoding";
54
55 // To speed things up a bit we keep the last accessed metadata.xml file in memory
56 static private File loaded_file = null;
57 static private Document loaded_file_document = null;
58 static private boolean loaded_file_changed = false;
59
60
61 public MetadataXMLFile(String metadata_xml_file_path)
62 {
63 super(metadata_xml_file_path);
64 }
65
66 public void clearAllMetadataInFile() {
67 // If this metadata.xml file isn't the one currently loaded, load it now
68 if (loaded_file != this) {
69 // First we must save out the currently loaded file
70 saveLoadedFile();
71
72 // Parse the metadata.xml file
73 Document document = XMLTools.parseXMLFile(this);
74 if (document == null) {
75 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
76 return;
77 }
78
79 loaded_file = this;
80 loaded_file_document = document;
81
82 reEncodeFilenamesInMetadataXML(loaded_file_document);
83 }
84
85 Element root = loaded_file_document.getDocumentElement();
86 while(root.hasChildNodes()) {
87 root.removeChild(root.getFirstChild());
88 }
89 loaded_file_changed = true;
90 //saveLoadedFile(); // this final metaxml file being cleared of meta will get saved by MetaXMLFileManager.clearAllMetadataInCollection()
91 }
92
93
94 public void addMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
95 {
96 // If this metadata.xml file isn't the one currently loaded, load it now
97 if (loaded_file != this) {
98 // First we must save out the currently loaded file
99 saveLoadedFile();
100
101 // Parse the metadata.xml file
102 Document document = XMLTools.parseXMLFile(this);
103 if (document == null) {
104 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
105 return;
106 }
107
108 loaded_file = this;
109 loaded_file_document = document;
110 reEncodeFilenamesInMetadataXML(loaded_file_document);
111
112 }
113
114 // Determine the file's path relative to the location of the metadata.xml file
115 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
116
117 String file_relative_path = file_node.getURLEncodedFilePath();
118 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
119 file_relative_path = file_relative_path.replace("&", FilenameEncoding.HEX_ENTITY_AMPERSAND);
120 }
121 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
122
123 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
124 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
125 }
126
127 // Form a regular expression that specifies the scope of the metadata
128 String file_path_regexp;
129 if (file_relative_path.equals("")) {
130 // Special case for matching all files in the directory
131 file_path_regexp = DIRECTORY_FILENAME;
132 }
133 else {
134 // Convert the file path into a regular expression that will match it
135 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
136 }
137
138 // LEAVE THIS DEBUGGING STATEMENT IN - USEFUL TO DEBUG FILENAME ENCODING ISSUES WHEN META ASSIGNED
139 //System.err.println("MetadataXMLFile.addMetadata() Adding meta for file regexp: "
140 // + file_path_regexp + " - " + org.greenstone.gatherer.util.Utility.debugUnicodeString(file_path_regexp));
141
142 // Find the appropriate FileSet element for this file
143 Element appropriate_fileset_element = null;
144
145 // Read all the FileSet elements in the file
146 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
147 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
148 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
149
150 // Check the FileName elements of the FileSet to see if we have a match
151 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
152 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
153 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
154 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
155
156 // Only exact matches can be extended with new metadata
157 if (current_filename_element_value.equals(file_path_regexp)) {
158 appropriate_fileset_element = current_fileset_element;
159 break;
160 }
161 }
162 }
163
164 // If no appropriate FileSet element exists create a new one for this file
165 if (appropriate_fileset_element == null) {
166 DebugStream.println("Creating new FileSet element for file since none exists..."+file_path_regexp);
167 appropriate_fileset_element = loaded_file_document.createElement(FILESET_ELEMENT);
168
169 Element new_filename_element = loaded_file_document.createElement(FILENAME_ELEMENT);
170 new_filename_element.appendChild(loaded_file_document.createTextNode(file_path_regexp));
171 appropriate_fileset_element.appendChild(new_filename_element);
172
173 Element new_description_element = loaded_file_document.createElement(DESCRIPTION_ELEMENT);
174 appropriate_fileset_element.appendChild(new_description_element);
175
176 // add the fileset element for .* at the top: especially important for
177 // non-accumulating (and override mode) meta. Other type fileset elements can be appended
178 if(file_path_regexp.equals(DIRECTORY_FILENAME)) {
179 loaded_file_document.getDocumentElement().insertBefore(appropriate_fileset_element,
180 loaded_file_document.getDocumentElement().getFirstChild());
181 } else {
182 loaded_file_document.getDocumentElement().appendChild(appropriate_fileset_element);
183 }
184 }
185
186 // Add each of the metadata values to the FileSet's Description element
187 Element description_element = (Element) appropriate_fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT).item(0);
188 for (int i = 0; i < metadata_values.size(); i++) {
189 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
190 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
191
192 // Remove any characters that are invalid in XML
193 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
194
195 // Square brackets need to be escaped because they are a special character in Greenstone
196 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
197 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
198
199 // the gs.filenameEncoding metadata is unique in that, when added, removed or
200 // changed, it must be applied on the file(name) whose metadata has been adjusted
201 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
202 metadata_value_string = processFilenameEncoding(file_path_regexp,
203 file_node, metadata_value_string, false);
204 // true only if removing meta
205 }
206
207 // Check if this piece of metadata has already been assigned to this FileSet element
208 boolean metadata_already_assigned = false;
209 NodeList metadata_elements_nodelist = description_element.getElementsByTagName(METADATA_ELEMENT);
210 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
211 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
212
213 // Check if the metadata element name matches
214 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
215 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
216 // if the metadata must not accumulate, then edit the current value
217 if (!metadata_value.isAccumulatingMetadata()) {
218 XMLTools.setNodeText(current_metadata_element, metadata_value_string);
219 metadata_already_assigned = true;
220 break;
221 }
222 // Check if the metadata element value matches
223 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
224 if (current_metadata_value_string.equals(metadata_value_string)) {
225 // Metadata already assigned
226 metadata_already_assigned = true;
227 break;
228 }
229 }
230 }
231
232 // If the piece of metadata hasn't already been assigned, add it now
233 if (!metadata_already_assigned) {
234 // Create a new Metadata element to record this metadata
235 Element new_metadata_element = loaded_file_document.createElement(METADATA_ELEMENT);
236 new_metadata_element.setAttribute("name", metadata_value.getMetadataElement().getFullName());
237 new_metadata_element.setAttribute("mode", (metadata_value.isAccumulatingMetadata() ? "accumulate" : "override"));
238 new_metadata_element.appendChild(loaded_file_document.createTextNode(metadata_value_string));
239
240 // Accumulating metadata: add at the end
241 if (metadata_value.isAccumulatingMetadata()) {
242 description_element.appendChild(new_metadata_element);
243 }
244 // Override metadata: add at the start (so it overrides inherited metadata without affecting other assigned metadata)
245 else {
246 description_element.insertBefore(new_metadata_element, description_element.getFirstChild());
247 }
248 }
249 }
250
251 // Remember that we've changed the file so it gets saved when a new one is loaded
252 loaded_file_changed = true;
253 }
254
255
256 // DO NOT REMOVE THE System.err DEBUGGING STATEMENTS FROM THIS METHOD: HELPS WITH TESTING/DEBUGGING
257 // WHEN FILE-LEVEL META IS ASSIGNED TO NON-ASCII ENCODED FILENAMES OR WITH FILENAMES CONTAINING +/ampersand
258 public ArrayList getMetadataAssignedToFile(File file, boolean fileEncodingOnly)
259 {
260 // If this metadata.xml file isn't the one currently loaded, load it now
261 if (loaded_file != this) {
262 // First we must save out the currently loaded file
263 saveLoadedFile();
264
265 // Parse the metadata.xml file
266 Document document = XMLTools.parseXMLFile(this);
267 if (document == null) {
268 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
269 return new ArrayList();
270 }
271
272 loaded_file = this;
273 loaded_file_document = document;
274
275 reEncodeFilenamesInMetadataXML(loaded_file_document);
276 }
277
278 // Determine the file's path relative to the location of the metadata.xml file
279 String file_relative_path = FilenameEncoding.fileToURLEncoding(file);
280 File metadata_xml_file_directory = getParentFile();
281 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(metadata_xml_file_directory);
282 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
283
284 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
285 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
286 }
287
288 // Build up a list of metadata assigned to this file
289 ArrayList metadata_values = new ArrayList();
290
291 // Read all the FileSet elements in the file
292 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
293 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
294 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
295 boolean current_fileset_matches = false;
296 boolean is_one_file_only_metadata = true;
297 File folder_metadata_inherited_from = null;
298
299 // Check the FileName elements of the FileSet to see if we have a match
300 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
301 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
302 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
303 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
304
305 //System.err.println("\n Original TAIL filename was: " + Utility.debugUnicodeString(file.getName()));
306 String hexdecoded_regexed_file_relative_path = FilenameEncoding.decodeStringContainingHexEntities(file_relative_path);
307 //System.err.println("Looking in meta.xml for hexdecoded_regexed_file_RELATIVE_path: " + hexdecoded_regexed_file_relative_path
308 //+ " - debug version: " + Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path));
309
310 // Does this fileset specify metadata for one file only?
311 is_one_file_only_metadata = true;
312 if (current_filename_element_value.indexOf("*") != -1 && !current_filename_element_value.equals(DIRECTORY_FILENAME)) {
313 // No, it specifies metadata for multiple files (but not all the files in the directory)
314 is_one_file_only_metadata = false;
315 }
316
317 String hexdecoded_current_filename_element_value = FilenameEncoding.decodeStringContainingHexEntities(current_filename_element_value);
318 //System.err.println(" Checking to see if it matches " + hexdecoded_current_filename_element_value + " - debug: " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
319 //System.err.println(" Checking to see if it matches " + current_filename_element_value + " - debug: " + Utility.debugUnicodeString(current_filename_element_value));
320
321 // This fileset specifies metadata for the file
322 // MetadataXMLFile.addMetadata(CollectionTreeNode, ArrayList) stored filename in uppercase hex
323 // so need to make sure everything hex has been decoded (no more hex) to compare apples with apples
324 if (hexdecoded_regexed_file_relative_path.matches(hexdecoded_current_filename_element_value)) { //if (file_relative_path.matches(current_filename_element_value)) {
325 //System.err.println(" @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path + "\n");
326 current_fileset_matches = true;
327 if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) {
328 folder_metadata_inherited_from = metadata_xml_file_directory;
329 }
330 break;
331 } //else {
332 //System.err.println( hexdecoded_regexed_file_relative_path + " does not match " + hexdecoded_current_filename_element_value);
333 //System.err.println( Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path) + " does not match " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
334 //}
335
336 // This fileset specifies metadata for the folder the file is in
337 if (hexdecoded_regexed_file_relative_path.startsWith(hexdecoded_current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) {
338 current_fileset_matches = true;
339 folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value);
340 break;
341 }
342 }
343
344 // The FileSet doesn't apply, so move onto the next one
345 if (current_fileset_matches == false) {
346 continue;
347 }
348
349 // Read all the Metadata elements in the fileset
350 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
351 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
352 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
353 String metadata_element_name_full = current_metadata_element.getAttribute("name");
354 // if we're only looking for fileEncoding metadata and this isn't it, skip to the next
355 if(fileEncodingOnly && !metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
356 continue;
357 }
358 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
359
360 // Ignore legacy crap
361 if (metadata_set_namespace.equals("hidden")) {
362 continue;
363 }
364
365 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
366 if (metadata_set == null) {
367 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
368 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
369 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
370 // Skip this element if we still don't have a loaded element for it
371 continue;
372 }
373
374 metadata_element_name_full = target_metadata_element_name_full;
375 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
376 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
377 }
378
379 MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(metadata_element_name_full);
380
381 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
382 // If the element doesn't exist in the metadata set, we're not interested
383 //Shaoqun modified. It needs to be added to metadata_set because the user might disable skim file
384 if (metadata_element == null) {
385 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
386 // continue;
387 }
388
389 // Square brackets need to be escaped because they are a special character in Greenstone
390 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
391 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
392 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
393
394 MetadataValueTreeNode metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
395
396 // If there is no metadata value tree node for this value, create it
397 if (metadata_value_tree_node == null) {
398 DebugStream.println("Note: No value tree node for metadata value \"" + metadata_value_string + "\"");
399 metadata_element.addMetadataValue(metadata_value_string);
400 metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
401 }
402
403 MetadataValue metadata_value = new MetadataValue(metadata_element, metadata_value_tree_node);
404 metadata_value.inheritsMetadataFromFolder(folder_metadata_inherited_from);
405 metadata_value.setIsOneFileOnlyMetadata(is_one_file_only_metadata);
406
407 // Is this accumulating metadata?
408 if (current_metadata_element.getAttribute("mode").equals("accumulate")) {
409 metadata_value.setIsAccumulatingMetadata(true);
410 }
411
412 // Add the new metadata value to the list
413 metadata_values.add(metadata_value);
414 }
415 }
416
417 return metadata_values;
418 }
419
420
421 public void removeMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
422 {
423 // If this metadata.xml file isn't the one currently loaded, load it now
424 if (loaded_file != this) {
425 // First we must save out the currently loaded file
426 saveLoadedFile();
427
428 // Parse the metadata.xml file
429 Document document = XMLTools.parseXMLFile(this);
430 if (document == null) {
431 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
432 return;
433 }
434
435 loaded_file = this;
436 loaded_file_document = document;
437
438 reEncodeFilenamesInMetadataXML(loaded_file_document);
439 }
440
441 // Determine the file's path relative to the location of the metadata.xml file
442 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
443 String file_relative_path = file_node.getURLEncodedFilePath();
444 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
445 file_relative_path = file_relative_path.replace("&", FilenameEncoding.HEX_ENTITY_AMPERSAND);
446 }
447 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
448 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
449 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
450 }
451
452 // Form a regular expression that specifies the scope of the metadata
453 String file_path_regexp;
454 if (file_relative_path.equals("")) {
455 // Special case for matching all files in the directory
456 file_path_regexp = DIRECTORY_FILENAME;
457 }
458 else {
459 // Convert the file path into a regular expression that will match it
460 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
461 }
462
463 // Find the appropriate FileSet element for this file
464 Element appropriate_fileset_element = null;
465
466 // Read all the FileSet elements in the file
467 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
468 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
469 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
470
471 // Check the FileName elements of the FileSet to see if we have a match
472 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
473 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
474 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
475 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
476
477 // Only exact matches can be extended with new metadata
478 if (current_filename_element_value.equals(file_path_regexp)) {
479 appropriate_fileset_element = current_fileset_element;
480 break;
481 }
482 }
483 }
484
485 // If no appropriate FileSet element exists the metadata isn't assigned in this metadata.xml file
486 if (appropriate_fileset_element == null) {
487 DebugStream.println("Note: No appropriate FileSet element found when removing metadata from " + this);
488 return;
489 }
490
491 // Remove each of the metadata values from the FileSet's Description element
492 for (int i = 0; i < metadata_values.size(); i++) {
493 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
494
495 // Remove any characters that are invalid in XML
496 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
497
498 // Square brackets need to be escaped because they are a special character in Greenstone
499 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
500 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
501
502 // Find the Metadata element to delete from the fileset
503 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
504 NodeList metadata_elements_nodelist = appropriate_fileset_element.getElementsByTagName(METADATA_ELEMENT);
505 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
506 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
507
508 // Check the metadata element name matches
509 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
510 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
511 // Check the metadata element value matches
512 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
513 if (current_metadata_value_string.equals(metadata_value_string)) {
514
515 // Remove this Metadata element
516 current_metadata_element.getParentNode().removeChild(current_metadata_element);
517
518 // the gs.filenameEncoding metadata is unique in that, when added, removed or
519 // changed, it must be applied on the file(name) whose metadata has been adjusted
520 if(current_metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
521
522 // metadata_value_string will hereafter be the inherited gs.FilenameEncoding
523 // metadata (if any), now that the value at this level has been removed
524 metadata_value_string = processFilenameEncoding(file_path_regexp,
525 file_node, "", true); // true only if *removing* this meta
526 }
527
528 // If there are no Metadata elements left now, remove the (empty) FileSet element
529 if (metadata_elements_nodelist.getLength() == 0) {
530 appropriate_fileset_element.getParentNode().removeChild(appropriate_fileset_element);
531 }
532
533 break;
534 }
535 }
536 }
537 }
538
539 // Remember that we've changed the file so it gets saved when a new one is loaded
540 loaded_file_changed = true;
541 }
542
543
544 public void replaceMetadata(CollectionTreeNode file_node, MetadataValue old_metadata_value, MetadataValue new_metadata_value)
545 {
546 // If this metadata.xml file isn't the one currently loaded, load it now
547 if (loaded_file != this) {
548 // First we must save out the currently loaded file
549 saveLoadedFile();
550
551 // Parse the metadata.xml file
552 Document document = XMLTools.parseXMLFile(this);
553 if (document == null) {
554 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
555 return;
556 }
557
558 loaded_file = this;
559 loaded_file_document = document;
560
561 reEncodeFilenamesInMetadataXML(loaded_file_document);
562 }
563
564 // Determine the file's path relative to the location of the metadata.xml file
565 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
566 String file_relative_path = file_node.getURLEncodedFilePath();
567 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
568 file_relative_path = file_relative_path.replace("&", FilenameEncoding.HEX_ENTITY_AMPERSAND);
569 }
570 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
571 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
572 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
573 }
574
575 // Form a regular expression that specifies the scope of the metadata
576 String file_path_regexp;
577 if (file_relative_path.equals("")) {
578 // Special case for matching all files in the directory
579 file_path_regexp = DIRECTORY_FILENAME;
580 }
581 else {
582 // Convert the file path into a regular expression that will match it
583 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
584 }
585
586 // Remove any characters that are invalid in XML
587 String old_metadata_value_string = XMLTools.removeInvalidCharacters(old_metadata_value.getFullValue());
588 String new_metadata_value_string = XMLTools.removeInvalidCharacters(new_metadata_value.getFullValue());
589
590 // Square brackets need to be escaped because they are a special character in Greenstone
591 old_metadata_value_string = old_metadata_value_string.replaceAll("\\[", "&#091;");
592 old_metadata_value_string = old_metadata_value_string.replaceAll("\\]", "&#093;");
593 new_metadata_value_string = new_metadata_value_string.replaceAll("\\[", "&#091;");
594 new_metadata_value_string = new_metadata_value_string.replaceAll("\\]", "&#093;");
595
596 // Read all the FileSet elements in the file
597 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
598 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
599 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
600 boolean current_fileset_matches = false;
601
602 // Check the FileName elements of the FileSet to see if we have a match
603 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
604 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
605 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
606 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
607
608 // Only exact matches can be edited
609 if (current_filename_element_value.equals(file_path_regexp)) {
610 current_fileset_matches = true;
611 break;
612 }
613 }
614
615 // The FileSet doesn't apply, so move onto the next one
616 if (current_fileset_matches == false) {
617 continue;
618 }
619
620 // Each metadata value is only allowed to be assigned once
621 boolean new_metadata_value_already_exists = false;
622 Element metadata_element_to_edit = null;
623
624 // Find the Metadata element to replace in the fileset
625 String metadata_element_name_full = old_metadata_value.getMetadataElement().getFullName();
626 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
627 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
628 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
629
630 // Check the metadata element name matches
631 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
632 if (!current_metadata_element_name_full.equals(metadata_element_name_full)) {
633 continue;
634 }
635
636 // Check the new metadata value doesn't already exist
637 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
638 if (current_metadata_value_string.equals(new_metadata_value_string)) {
639 new_metadata_value_already_exists = true;
640 }
641
642 // Check the metadata element value matches
643 if (current_metadata_value_string.equals(old_metadata_value_string)) {
644 metadata_element_to_edit = current_metadata_element;
645 }
646 }
647
648 // If the new metadata value already existed, remove the original value
649 if (new_metadata_value_already_exists) {
650 if(metadata_element_to_edit != null) { //?????????
651 metadata_element_to_edit.getParentNode().removeChild(metadata_element_to_edit);
652 } else {
653 System.err.println("ERROR MetadataXMLFile: metadata_element_to_edit is null");
654 }
655 }
656 // Otherwise replace the old value with the new value
657 // Ensure metadata_element_to_edit isn't null (may occur when multiple files are selected)
658 else if (metadata_element_to_edit != null) {
659
660 // the gs.filenameEncoding metadata is unique in that, when added, removed or
661 // changed, it must be applied on the file(name) whose metadata has been adjusted
662 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
663 new_metadata_value_string = processFilenameEncoding(file_path_regexp, file_node, new_metadata_value_string, false);
664 // true only if removing meta
665 }
666 XMLTools.setElementTextValue(metadata_element_to_edit, new_metadata_value_string);
667 }
668 }
669
670 // Remember that we've changed the file so it gets saved when a new one is loaded
671 loaded_file_changed = true;
672 }
673
674
675 static public void saveLoadedFile()
676 {
677 // If we have a file loaded into memory and it has been modified, save it now
678 if (loaded_file != null && loaded_file_changed == true) {
679 //System.err.println("START saveLoadedFile(), loaded_file_document:\n" + XMLTools.elementToString(loaded_file_document.getDocumentElement(), true));
680
681 XMLTools.writeXMLFile(loaded_file, loaded_file_document, nonEscapingElements);
682
683 loaded_file_changed = false;
684 }
685 }
686
687 /**
688 * parseXML(metadata.xml) has the side-effect of resolving html entities.
689 * Although this is not done by the GLIEntityResolver usage in parseXML(), something
690 * in parseXML() is resolving the html entities, including those used in carefully
691 * html-entity-escaped filenames.
692 * We need to get the filenames in the DOM correct after parsing a metadata.xml file
693 * into memory, so that we have the correct filenames and so that we'll write it out correctly.
694 * Therefore, always call this method after a successful parseXML() call on a metadata.xml.
695 * @param doc is the Document where the FILENAME_ELEMENTs need to be re-encoded.
696 * At the end of this function, the doc will be modified with the re-encoded filenames.
697 *
698 * DO NOT REMOVE THE DEBUGGING STATEMENTS IN THIS FUNCTION, AS THEY'RE USEFUL
699 * FOR DEBUGGING ENCODING ISSUES TO DO WITH FILE LEVEL META ASSIGNED TO FILENAMES
700 * THAT ARE NON-ASCII OR CONTAIN +/ampersands IN THEM.
701 */
702 static private void reEncodeFilenamesInMetadataXML(Document doc) {
703
704 String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding("."); // returns the curr dir path after removing the /./ at end
705 //System.err.println("@@@ curr_directory_path: " + curr_directory_path);
706
707 //System.err.println("PARSED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
708
709 // Read all the FileSet elements in the file
710 NodeList fileset_elements_nodelist = doc.getElementsByTagName(FILESET_ELEMENT);
711 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
712 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
713
714 // get the value of all FileName elements
715 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
716 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
717 Element filename_element = (Element) filename_elements_nodelist.item(j);
718 String filename = XMLTools.getElementTextValue(filename_element);
719 if(!filename.equals(DIRECTORY_FILENAME)) {
720 // Reencode filename after parseXML() had the side-effect of decoding entities in filename elements
721
722 //System.err.println("Filename before reencoding was: " + filename);
723
724 String encoded_filename = filename;
725 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
726 encoded_filename = encoded_filename.replace("&", FilenameEncoding.HEX_ENTITY_AMPERSAND);
727 } else {
728 // Can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object
729 // created by filenameToURLEncoding). So replace backslashes in regex with url-encoded hex-value of backslash, %5C.
730 encoded_filename = filename.replace("\\", "%5C");
731
732 // get the URL encoded filename preserving special encodings, with any curr_directory_path prefix removed
733 encoded_filename = FilenameEncoding.filenameToURLEncodingWithPrefixRemoved(encoded_filename, curr_directory_path);
734
735 // Reintrodudce the backslash characters in place of their %5C hex placeholders
736 encoded_filename = encoded_filename.replace("%5C", "\\");
737 }
738 // Update filename element in DOM
739 XMLTools.setElementTextValue(filename_element, encoded_filename);
740 //System.err.println("Filename after reencoding was: " + encoded_filename);
741 }
742 }
743 }
744 //System.err.println("RE-ENCODED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
745 }
746
747 /**
748 * Every metadata.xml file must be skimmed when a collection is opened, for three very important reasons:
749 * - To handle any non-namespaced metadata in the metadata.xml files (this is mapped and the files rewritten)
750 * - To get a complete list of the metadata elements in the collection (used in Design and Format panes)
751 * - To build complete and accurate metadata value trees (used in the Enrich pane)
752 */
753 public void skimFile()
754 {
755 boolean file_changed = false;
756
757 // Parse the metadata.xml file
758 DebugStream.println("Skimming metadata.xml file " + this + "...");
759
760 Document document = XMLTools.parseXMLFile(this);
761 if (document == null) {
762 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
763 return;
764 }
765 // Always call this method after calling parseXMLFile
766 reEncodeFilenamesInMetadataXML(document);
767
768 // Read all the Metadata elements in the file
769 HashMap target_metadata_element_name_attrs_cache = new HashMap();
770 NodeList metadata_elements_nodelist = document.getElementsByTagName(METADATA_ELEMENT);
771 for (int i = 0; i < metadata_elements_nodelist.getLength(); i++) {
772 Element current_metadata_element = (Element) metadata_elements_nodelist.item(i);
773 String metadata_element_name_full = current_metadata_element.getAttribute("name");
774 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
775
776 // Ignore legacy crap
777 if (metadata_set_namespace.equals("hidden")) {
778 continue;
779 }
780
781 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
782 if (metadata_set == null) {
783 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
784 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
785 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
786 // Skip this element if we still don't have a loaded element for it
787 continue;
788 }
789
790 // Update the metadata.xml file to have the new (namespaced) element name
791 // Instead of using current_metadata_element.setAttribute("name", target_metadata_element_name_full)
792 // we create an Attr object for each target metadata element name, and cache them
793 // This makes a *huge* difference (namespacing a metadata.xml file with 45000 metadata entries now
794 // takes 45 seconds instead of 30 minutes!) -- why is setting the value of a Node so slow?
795 Attr target_metadata_element_name_attr = (Attr) target_metadata_element_name_attrs_cache.get(target_metadata_element_name_full);
796 if (target_metadata_element_name_attr == null) {
797 target_metadata_element_name_attr = document.createAttribute("name");
798 target_metadata_element_name_attr.setValue(target_metadata_element_name_full);
799 target_metadata_element_name_attrs_cache.put(target_metadata_element_name_full, target_metadata_element_name_attr);
800 }
801
802 // Remove the old name attribute and add the new (namespaced) one
803 current_metadata_element.removeAttribute("name");
804 current_metadata_element.setAttributeNode((Attr) target_metadata_element_name_attr.cloneNode(false));
805 file_changed = true;
806
807 metadata_element_name_full = target_metadata_element_name_full;
808 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
809 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
810 }
811
812 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
813 MetadataElement metadata_element = metadata_set.getMetadataElementWithName(metadata_element_name);
814
815 // If the element doesn't exist in the metadata set, add it
816 if (metadata_element == null) {
817 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
818 }
819
820 // Square brackets need to be escaped because they are a special character in Greenstone
821 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
822 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
823 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
824
825 metadata_element.addMetadataValue(metadata_value_string);
826 }
827
828 // Rewrite the metadata.xml file if it has changed
829 if (file_changed) {
830 XMLTools.writeXMLFile(this, document);
831 }
832 }
833
834 /**
835 * The gs.filenameEncoding metadata is unique in that, when added, removed or
836 * replaced, it must be applied on the file(name) whose metadata has been
837 * adjusted.
838 * This method handles all that, given the regular expression or filepath name
839 * to match on (.* matches subdirectories), the affected fileNode, the new
840 * encoding value and whether a new encoding value has been added/an existing
841 * one has been replaced or whether the encoding metadata has been removed.
842 * The new adjusted value for the encoding metadata is returned.
843 *
844 * MetadataXMLFileManager maintains a hashmap of (URL-encoded filepaths, encoding)
845 * to allow fast access to previously assigned gs.filenameEncoding metadata (if
846 * any) for each file. This hashmap also needs to be updated, but this update
847 * is complicated by the fact that it concerns regular expressions that could
848 * affect multiple filenames.
849 */
850 public String processFilenameEncoding(String file_path_regexp, CollectionTreeNode file_node,
851 String encoding_metadata_value, boolean removingMetadata)
852 {
853 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
854 return encoding_metadata_value;
855 }
856
857 // Work out this filenode's new encoding and apply it:
858
859 if(removingMetadata) { // encoding_metadata_value = ""
860 // gs.filenameEncoding metadata being removed, work out
861 // any inherited metadata to replace it with in the meta-table
862 encoding_metadata_value = FilenameEncoding.getInheritedFilenameEncoding(
863 file_node.getURLEncodedFilePath(), file_node.getFile());
864 // should be canonical encoding already
865 }
866 else if(!encoding_metadata_value.equals("")) {
867 // if adding or replacing filename encoding,
868 // get the canonical encoding name for this alias
869 encoding_metadata_value = FilenameEncoding.canonicalEncodingName(encoding_metadata_value);
870 }
871 // Reencode the display of this filenode only as any affected
872 // childnodes will be reencoded on FileNode.refreshDescendantEncodings()
873 file_node.reencodeDisplayName(encoding_metadata_value);
874
875
876 // Whether removing or adding/replacing the file's gs.filename encoding meta,
877 // store this in the file-to-encoding map for fast access, since the map stores
878 // empty string values when no meta has been assigned at this file level.
879 // In the case of removingMetadata, the value stored will be the fallback value
880
881 String urlpath = file_node.getURLEncodedFilePath();
882 if(removingMetadata) {
883 // remove it from the map instead of inserting "", so that when folders in the collectiontree
884 // are being deleted or shifted, the removemetada (and addmetadata) calls that get fired
885 // for each affected filenodes does not cause the undesirable effect of multiple "" to be
886 // entered into the filename-to-encoding map for filepaths that no longer exist .
887 FilenameEncoding.map.remove(urlpath);
888 } else { // for adding and replacing, put the encoding into the map (also replaces any existing encoding for it)
889 FilenameEncoding.map.put(urlpath, encoding_metadata_value);
890 }
891
892 // If new folder-level metadata (or metadata for a set of files fitting a pattern) has been
893 // assigned, the file_to_encodings map will be cleared for all descendant folders and files,
894 // so that these can be re-calculated upon refreshing the visible parts of the CollectionTree.
895 // Mark the state as requiring a refresh of the CollectionTree.
896 // This next step also serves to prevent the MetadataValueTableModel from trying to update
897 // itself while a refresh (involving re-encoding of filenames of visible nodes) is in progress.
898 FilenameEncoding.setRefreshRequired(true);
899
900 return encoding_metadata_value;
901 }
902}
Note: See TracBrowser for help on using the repository browser.