source: main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java@ 33749

Last change on this file since 33749 was 33749, checked in by ak19, 4 years ago

Still on the bugfix for GLI with non-ascii filenames assigned file-level metadata. Also needed to add the Linux fix for the case of replacing and removing metadata.

  • Property svn:keywords set to Author Date Id Revision
File size: 42.2 KB
Line 
1/**
2 *############################################################################
3 * A component of the Greenstone Librarian Interface, part of the Greenstone
4 * digital library suite from the New Zealand Digital Library Project at the
5 * University of Waikato, New Zealand.
6 *
7 * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ
8 *
9 * Copyright (C) 2004 New Zealand Digital Library Project
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *############################################################################
25 */
26
27package org.greenstone.gatherer.metadata;
28
29
30import java.io.*;
31import java.util.*;
32import org.greenstone.gatherer.DebugStream;
33import org.greenstone.gatherer.collection.CollectionTreeNode;
34import org.greenstone.gatherer.util.XMLTools;
35import org.w3c.dom.*;
36
37import org.greenstone.gatherer.util.Utility;
38
39/** This class represents one metadata.xml file */
40public class MetadataXMLFile
41 extends File
42{
43 static final private String DESCRIPTION_ELEMENT = "Description";
44 static final private String DIRECTORY_FILENAME = ".*";
45 static final private String FILENAME_ELEMENT = "FileName";
46 static final private String FILESET_ELEMENT = "FileSet";
47 static final private String METADATA_ELEMENT = "Metadata";
48 static final private String[] nonEscapingElements = new String[]{FILENAME_ELEMENT};
49
50 /** Special metadata field: the filename encoding is a unique sort of metadata in
51 * that it is not just information stored with a collection file, but also needs to
52 * be applied in real-time to the collection file (to its filename) for display. */
53 static final public String FILENAME_ENCODING_METADATA = "gs.filenameEncoding";
54
55 // To speed things up a bit we keep the last accessed metadata.xml file in memory
56 static private File loaded_file = null;
57 static private Document loaded_file_document = null;
58 static private boolean loaded_file_changed = false;
59
60
61 public MetadataXMLFile(String metadata_xml_file_path)
62 {
63 super(metadata_xml_file_path);
64 }
65
66
67 public void addMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
68 {
69 // If this metadata.xml file isn't the one currently loaded, load it now
70 if (loaded_file != this) {
71 // First we must save out the currently loaded file
72 saveLoadedFile();
73
74 // Parse the metadata.xml file
75 Document document = XMLTools.parseXMLFile(this);
76 if (document == null) {
77 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
78 return;
79 }
80
81 loaded_file = this;
82 loaded_file_document = document;
83 reEncodeFilenamesInMetadataXML(loaded_file_document);
84
85 }
86
87 // Determine the file's path relative to the location of the metadata.xml file
88 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
89
90 String file_relative_path = file_node.getURLEncodedFilePath();
91 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
92 file_relative_path = file_relative_path.replace("&", FilenameEncoding.HEX_ENTITY_AMPERSAND);
93 }
94 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
95
96 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
97 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
98 }
99
100 // Form a regular expression that specifies the scope of the metadata
101 String file_path_regexp;
102 if (file_relative_path.equals("")) {
103 // Special case for matching all files in the directory
104 file_path_regexp = DIRECTORY_FILENAME;
105 }
106 else {
107 // Convert the file path into a regular expression that will match it
108 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
109 }
110
111 // LEAVE THIS DEBUGGING STATEMENT IN - USEFUL TO DEBUG FILENAME ENCODING ISSUES WHEN META ASSIGNED
112 //System.err.println("MetadataXMLFile.addMetadata() Adding meta for file regexp: "
113 // + file_path_regexp + " - " + org.greenstone.gatherer.util.Utility.debugUnicodeString(file_path_regexp));
114
115 // Find the appropriate FileSet element for this file
116 Element appropriate_fileset_element = null;
117
118 // Read all the FileSet elements in the file
119 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
120 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
121 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
122
123 // Check the FileName elements of the FileSet to see if we have a match
124 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
125 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
126 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
127 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
128
129 // Only exact matches can be extended with new metadata
130 if (current_filename_element_value.equals(file_path_regexp)) {
131 appropriate_fileset_element = current_fileset_element;
132 break;
133 }
134 }
135 }
136
137 // If no appropriate FileSet element exists create a new one for this file
138 if (appropriate_fileset_element == null) {
139 DebugStream.println("Creating new FileSet element for file since none exists..."+file_path_regexp);
140 appropriate_fileset_element = loaded_file_document.createElement(FILESET_ELEMENT);
141
142 Element new_filename_element = loaded_file_document.createElement(FILENAME_ELEMENT);
143 new_filename_element.appendChild(loaded_file_document.createTextNode(file_path_regexp));
144 appropriate_fileset_element.appendChild(new_filename_element);
145
146 Element new_description_element = loaded_file_document.createElement(DESCRIPTION_ELEMENT);
147 appropriate_fileset_element.appendChild(new_description_element);
148
149 // add the fileset element for .* at the top: especially important for
150 // non-accumulating (and override mode) meta. Other type fileset elements can be appended
151 if(file_path_regexp.equals(DIRECTORY_FILENAME)) {
152 loaded_file_document.getDocumentElement().insertBefore(appropriate_fileset_element,
153 loaded_file_document.getDocumentElement().getFirstChild());
154 } else {
155 loaded_file_document.getDocumentElement().appendChild(appropriate_fileset_element);
156 }
157 }
158
159 // Add each of the metadata values to the FileSet's Description element
160 Element description_element = (Element) appropriate_fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT).item(0);
161 for (int i = 0; i < metadata_values.size(); i++) {
162 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
163 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
164
165 // Remove any characters that are invalid in XML
166 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
167
168 // Square brackets need to be escaped because they are a special character in Greenstone
169 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
170 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
171
172 // the gs.filenameEncoding metadata is unique in that, when added, removed or
173 // changed, it must be applied on the file(name) whose metadata has been adjusted
174 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
175 metadata_value_string = processFilenameEncoding(file_path_regexp,
176 file_node, metadata_value_string, false);
177 // true only if removing meta
178 }
179
180 // Check if this piece of metadata has already been assigned to this FileSet element
181 boolean metadata_already_assigned = false;
182 NodeList metadata_elements_nodelist = description_element.getElementsByTagName(METADATA_ELEMENT);
183 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
184 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
185
186 // Check if the metadata element name matches
187 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
188 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
189 // if the metadata must not accumulate, then edit the current value
190 if (!metadata_value.isAccumulatingMetadata()) {
191 XMLTools.setNodeText(current_metadata_element, metadata_value_string);
192 metadata_already_assigned = true;
193 break;
194 }
195 // Check if the metadata element value matches
196 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
197 if (current_metadata_value_string.equals(metadata_value_string)) {
198 // Metadata already assigned
199 metadata_already_assigned = true;
200 break;
201 }
202 }
203 }
204
205 // If the piece of metadata hasn't already been assigned, add it now
206 if (!metadata_already_assigned) {
207 // Create a new Metadata element to record this metadata
208 Element new_metadata_element = loaded_file_document.createElement(METADATA_ELEMENT);
209 new_metadata_element.setAttribute("name", metadata_value.getMetadataElement().getFullName());
210 new_metadata_element.setAttribute("mode", (metadata_value.isAccumulatingMetadata() ? "accumulate" : "override"));
211 new_metadata_element.appendChild(loaded_file_document.createTextNode(metadata_value_string));
212
213 // Accumulating metadata: add at the end
214 if (metadata_value.isAccumulatingMetadata()) {
215 description_element.appendChild(new_metadata_element);
216 }
217 // Override metadata: add at the start (so it overrides inherited metadata without affecting other assigned metadata)
218 else {
219 description_element.insertBefore(new_metadata_element, description_element.getFirstChild());
220 }
221 }
222 }
223
224 // Remember that we've changed the file so it gets saved when a new one is loaded
225 loaded_file_changed = true;
226 }
227
228
229 // DO NOT REMOVE THE System.err DEBUGGING STATEMENTS FROM THIS METHOD: HELPS WITH TESTING/DEBUGGING
230 // WHEN FILE-LEVEL META IS ASSIGNED TO NON-ASCII ENCODED FILENAMES OR WITH FILENAMES CONTAINING +/ampersand
231 public ArrayList getMetadataAssignedToFile(File file, boolean fileEncodingOnly)
232 {
233 // If this metadata.xml file isn't the one currently loaded, load it now
234 if (loaded_file != this) {
235 // First we must save out the currently loaded file
236 saveLoadedFile();
237
238 // Parse the metadata.xml file
239 Document document = XMLTools.parseXMLFile(this);
240 if (document == null) {
241 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
242 return new ArrayList();
243 }
244
245 loaded_file = this;
246 loaded_file_document = document;
247
248 reEncodeFilenamesInMetadataXML(loaded_file_document);
249 }
250
251 // Determine the file's path relative to the location of the metadata.xml file
252 String file_relative_path = FilenameEncoding.fileToURLEncoding(file);
253 File metadata_xml_file_directory = getParentFile();
254 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(metadata_xml_file_directory);
255 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
256
257 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
258 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
259 }
260
261 // Build up a list of metadata assigned to this file
262 ArrayList metadata_values = new ArrayList();
263
264 // Read all the FileSet elements in the file
265 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
266 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
267 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
268 boolean current_fileset_matches = false;
269 boolean is_one_file_only_metadata = true;
270 File folder_metadata_inherited_from = null;
271
272 // Check the FileName elements of the FileSet to see if we have a match
273 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
274 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
275 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
276 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
277
278 //System.err.println("\n Original TAIL filename was: " + Utility.debugUnicodeString(file.getName()));
279 String hexdecoded_regexed_file_relative_path = FilenameEncoding.decodeStringContainingHexEntities(file_relative_path);
280 //System.err.println("Looking in meta.xml for hexdecoded_regexed_file_RELATIVE_path: " + hexdecoded_regexed_file_relative_path
281 //+ " - debug version: " + Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path));
282
283 // Does this fileset specify metadata for one file only?
284 is_one_file_only_metadata = true;
285 if (current_filename_element_value.indexOf("*") != -1 && !current_filename_element_value.equals(DIRECTORY_FILENAME)) {
286 // No, it specifies metadata for multiple files (but not all the files in the directory)
287 is_one_file_only_metadata = false;
288 }
289
290 String hexdecoded_current_filename_element_value = FilenameEncoding.decodeStringContainingHexEntities(current_filename_element_value);
291 //System.err.println(" Checking to see if it matches " + hexdecoded_current_filename_element_value + " - debug: " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
292 //System.err.println(" Checking to see if it matches " + current_filename_element_value + " - debug: " + Utility.debugUnicodeString(current_filename_element_value));
293
294 // This fileset specifies metadata for the file
295 // MetadataXMLFile.addMetadata(CollectionTreeNode, ArrayList) stored filename in uppercase hex
296 // so need to make sure everything hex has been decoded (no more hex) to compare apples with apples
297 if (hexdecoded_regexed_file_relative_path.matches(hexdecoded_current_filename_element_value)) { //if (file_relative_path.matches(current_filename_element_value)) {
298 //System.err.println(" @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path + "\n");
299 current_fileset_matches = true;
300 if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) {
301 folder_metadata_inherited_from = metadata_xml_file_directory;
302 }
303 break;
304 } //else {
305 //System.err.println( hexdecoded_regexed_file_relative_path + " does not match " + hexdecoded_current_filename_element_value);
306 //System.err.println( Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path) + " does not match " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
307 //}
308
309 // This fileset specifies metadata for the folder the file is in
310 if (hexdecoded_regexed_file_relative_path.startsWith(hexdecoded_current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) {
311 current_fileset_matches = true;
312 folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value);
313 break;
314 }
315 }
316
317 // The FileSet doesn't apply, so move onto the next one
318 if (current_fileset_matches == false) {
319 continue;
320 }
321
322 // Read all the Metadata elements in the fileset
323 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
324 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
325 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
326 String metadata_element_name_full = current_metadata_element.getAttribute("name");
327 // if we're only looking for fileEncoding metadata and this isn't it, skip to the next
328 if(fileEncodingOnly && !metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
329 continue;
330 }
331 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
332
333 // Ignore legacy crap
334 if (metadata_set_namespace.equals("hidden")) {
335 continue;
336 }
337
338 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
339 if (metadata_set == null) {
340 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
341 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
342 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
343 // Skip this element if we still don't have a loaded element for it
344 continue;
345 }
346
347 metadata_element_name_full = target_metadata_element_name_full;
348 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
349 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
350 }
351
352 MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(metadata_element_name_full);
353
354 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
355 // If the element doesn't exist in the metadata set, we're not interested
356 //Shaoqun modified. It needs to be added to metadata_set because the user might disable skim file
357 if (metadata_element == null) {
358 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
359 // continue;
360 }
361
362 // Square brackets need to be escaped because they are a special character in Greenstone
363 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
364 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
365 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
366
367 MetadataValueTreeNode metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
368
369 // If there is no metadata value tree node for this value, create it
370 if (metadata_value_tree_node == null) {
371 DebugStream.println("Note: No value tree node for metadata value \"" + metadata_value_string + "\"");
372 metadata_element.addMetadataValue(metadata_value_string);
373 metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
374 }
375
376 MetadataValue metadata_value = new MetadataValue(metadata_element, metadata_value_tree_node);
377 metadata_value.inheritsMetadataFromFolder(folder_metadata_inherited_from);
378 metadata_value.setIsOneFileOnlyMetadata(is_one_file_only_metadata);
379
380 // Is this accumulating metadata?
381 if (current_metadata_element.getAttribute("mode").equals("accumulate")) {
382 metadata_value.setIsAccumulatingMetadata(true);
383 }
384
385 // Add the new metadata value to the list
386 metadata_values.add(metadata_value);
387 }
388 }
389
390 return metadata_values;
391 }
392
393
394 public void removeMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
395 {
396 // If this metadata.xml file isn't the one currently loaded, load it now
397 if (loaded_file != this) {
398 // First we must save out the currently loaded file
399 saveLoadedFile();
400
401 // Parse the metadata.xml file
402 Document document = XMLTools.parseXMLFile(this);
403 if (document == null) {
404 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
405 return;
406 }
407
408 loaded_file = this;
409 loaded_file_document = document;
410
411 reEncodeFilenamesInMetadataXML(loaded_file_document);
412 }
413
414 // Determine the file's path relative to the location of the metadata.xml file
415 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
416 String file_relative_path = file_node.getURLEncodedFilePath();
417 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
418 file_relative_path = file_relative_path.replace("&", FilenameEncoding.HEX_ENTITY_AMPERSAND);
419 }
420 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
421 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
422 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
423 }
424
425 // Form a regular expression that specifies the scope of the metadata
426 String file_path_regexp;
427 if (file_relative_path.equals("")) {
428 // Special case for matching all files in the directory
429 file_path_regexp = DIRECTORY_FILENAME;
430 }
431 else {
432 // Convert the file path into a regular expression that will match it
433 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
434 }
435
436 // Find the appropriate FileSet element for this file
437 Element appropriate_fileset_element = null;
438
439 // Read all the FileSet elements in the file
440 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
441 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
442 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
443
444 // Check the FileName elements of the FileSet to see if we have a match
445 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
446 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
447 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
448 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
449
450 // Only exact matches can be extended with new metadata
451 if (current_filename_element_value.equals(file_path_regexp)) {
452 appropriate_fileset_element = current_fileset_element;
453 break;
454 }
455 }
456 }
457
458 // If no appropriate FileSet element exists the metadata isn't assigned in this metadata.xml file
459 if (appropriate_fileset_element == null) {
460 DebugStream.println("Note: No appropriate FileSet element found when removing metadata from " + this);
461 return;
462 }
463
464 // Remove each of the metadata values from the FileSet's Description element
465 for (int i = 0; i < metadata_values.size(); i++) {
466 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
467
468 // Remove any characters that are invalid in XML
469 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
470
471 // Square brackets need to be escaped because they are a special character in Greenstone
472 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
473 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
474
475 // Find the Metadata element to delete from the fileset
476 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
477 NodeList metadata_elements_nodelist = appropriate_fileset_element.getElementsByTagName(METADATA_ELEMENT);
478 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
479 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
480
481 // Check the metadata element name matches
482 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
483 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
484 // Check the metadata element value matches
485 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
486 if (current_metadata_value_string.equals(metadata_value_string)) {
487
488 // Remove this Metadata element
489 current_metadata_element.getParentNode().removeChild(current_metadata_element);
490
491 // the gs.filenameEncoding metadata is unique in that, when added, removed or
492 // changed, it must be applied on the file(name) whose metadata has been adjusted
493 if(current_metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
494
495 // metadata_value_string will hereafter be the inherited gs.FilenameEncoding
496 // metadata (if any), now that the value at this level has been removed
497 metadata_value_string = processFilenameEncoding(file_path_regexp,
498 file_node, "", true); // true only if *removing* this meta
499 }
500
501 // If there are no Metadata elements left now, remove the (empty) FileSet element
502 if (metadata_elements_nodelist.getLength() == 0) {
503 appropriate_fileset_element.getParentNode().removeChild(appropriate_fileset_element);
504 }
505
506 break;
507 }
508 }
509 }
510 }
511
512 // Remember that we've changed the file so it gets saved when a new one is loaded
513 loaded_file_changed = true;
514 }
515
516
517 public void replaceMetadata(CollectionTreeNode file_node, MetadataValue old_metadata_value, MetadataValue new_metadata_value)
518 {
519 // If this metadata.xml file isn't the one currently loaded, load it now
520 if (loaded_file != this) {
521 // First we must save out the currently loaded file
522 saveLoadedFile();
523
524 // Parse the metadata.xml file
525 Document document = XMLTools.parseXMLFile(this);
526 if (document == null) {
527 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
528 return;
529 }
530
531 loaded_file = this;
532 loaded_file_document = document;
533
534 reEncodeFilenamesInMetadataXML(loaded_file_document);
535 }
536
537 // Determine the file's path relative to the location of the metadata.xml file
538 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
539 String file_relative_path = file_node.getURLEncodedFilePath();
540 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
541 file_relative_path = file_relative_path.replace("&", FilenameEncoding.HEX_ENTITY_AMPERSAND);
542 }
543 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
544 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
545 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
546 }
547
548 // Form a regular expression that specifies the scope of the metadata
549 String file_path_regexp;
550 if (file_relative_path.equals("")) {
551 // Special case for matching all files in the directory
552 file_path_regexp = DIRECTORY_FILENAME;
553 }
554 else {
555 // Convert the file path into a regular expression that will match it
556 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
557 }
558
559 // Remove any characters that are invalid in XML
560 String old_metadata_value_string = XMLTools.removeInvalidCharacters(old_metadata_value.getFullValue());
561 String new_metadata_value_string = XMLTools.removeInvalidCharacters(new_metadata_value.getFullValue());
562
563 // Square brackets need to be escaped because they are a special character in Greenstone
564 old_metadata_value_string = old_metadata_value_string.replaceAll("\\[", "&#091;");
565 old_metadata_value_string = old_metadata_value_string.replaceAll("\\]", "&#093;");
566 new_metadata_value_string = new_metadata_value_string.replaceAll("\\[", "&#091;");
567 new_metadata_value_string = new_metadata_value_string.replaceAll("\\]", "&#093;");
568
569 // Read all the FileSet elements in the file
570 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
571 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
572 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
573 boolean current_fileset_matches = false;
574
575 // Check the FileName elements of the FileSet to see if we have a match
576 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
577 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
578 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
579 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
580
581 // Only exact matches can be edited
582 if (current_filename_element_value.equals(file_path_regexp)) {
583 current_fileset_matches = true;
584 break;
585 }
586 }
587
588 // The FileSet doesn't apply, so move onto the next one
589 if (current_fileset_matches == false) {
590 continue;
591 }
592
593 // Each metadata value is only allowed to be assigned once
594 boolean new_metadata_value_already_exists = false;
595 Element metadata_element_to_edit = null;
596
597 // Find the Metadata element to replace in the fileset
598 String metadata_element_name_full = old_metadata_value.getMetadataElement().getFullName();
599 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
600 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
601 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
602
603 // Check the metadata element name matches
604 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
605 if (!current_metadata_element_name_full.equals(metadata_element_name_full)) {
606 continue;
607 }
608
609 // Check the new metadata value doesn't already exist
610 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
611 if (current_metadata_value_string.equals(new_metadata_value_string)) {
612 new_metadata_value_already_exists = true;
613 }
614
615 // Check the metadata element value matches
616 if (current_metadata_value_string.equals(old_metadata_value_string)) {
617 metadata_element_to_edit = current_metadata_element;
618 }
619 }
620
621 // If the new metadata value already existed, remove the original value
622 if (new_metadata_value_already_exists) {
623 if(metadata_element_to_edit != null) { //?????????
624 metadata_element_to_edit.getParentNode().removeChild(metadata_element_to_edit);
625 } else {
626 System.err.println("ERROR MetadataXMLFile: metadata_element_to_edit is null");
627 }
628 }
629 // Otherwise replace the old value with the new value
630 // Ensure metadata_element_to_edit isn't null (may occur when multiple files are selected)
631 else if (metadata_element_to_edit != null) {
632
633 // the gs.filenameEncoding metadata is unique in that, when added, removed or
634 // changed, it must be applied on the file(name) whose metadata has been adjusted
635 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
636 new_metadata_value_string = processFilenameEncoding(file_path_regexp, file_node, new_metadata_value_string, false);
637 // true only if removing meta
638 }
639 XMLTools.setElementTextValue(metadata_element_to_edit, new_metadata_value_string);
640 }
641 }
642
643 // Remember that we've changed the file so it gets saved when a new one is loaded
644 loaded_file_changed = true;
645 }
646
647
648 static public void saveLoadedFile()
649 {
650 // If we have a file loaded into memory and it has been modified, save it now
651 if (loaded_file != null && loaded_file_changed == true) {
652 //System.err.println("START saveLoadedFile(), loaded_file_document:\n" + XMLTools.elementToString(loaded_file_document.getDocumentElement(), true));
653
654 XMLTools.writeXMLFile(loaded_file, loaded_file_document, nonEscapingElements);
655
656 loaded_file_changed = false;
657 }
658 }
659
660 /**
661 * parseXML(metadata.xml) has the side-effect of resolving html entities.
662 * Although this is not done by the GLIEntityResolver usage in parseXML(), something
663 * in parseXML() is resolving the html entities, including those used in carefully
664 * html-entity-escaped filenames.
665 * We need to get the filenames in the DOM correct after parsing a metadata.xml file
666 * into memory, so that we have the correct filenames and so that we'll write it out correctly.
667 * Therefore, always call this method after a successful parseXML() call on a metadata.xml.
668 * @param doc is the Document where the FILENAME_ELEMENTs need to be re-encoded.
669 * At the end of this function, the doc will be modified with the re-encoded filenames.
670 *
671 * DO NOT REMOVE THE DEBUGGING STATEMENTS IN THIS FUNCTION, AS THEY'RE USEFUL
672 * FOR DEBUGGING ENCODING ISSUES TO DO WITH FILE LEVEL META ASSIGNED TO FILENAMES
673 * THAT ARE NON-ASCII OR CONTAIN +/ampersands IN THEM.
674 */
675 static private void reEncodeFilenamesInMetadataXML(Document doc) {
676
677 String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding("."); // returns the curr dir path after removing the /./ at end
678 //System.err.println("@@@ curr_directory_path: " + curr_directory_path);
679
680 //System.err.println("PARSED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
681
682 // Read all the FileSet elements in the file
683 NodeList fileset_elements_nodelist = doc.getElementsByTagName(FILESET_ELEMENT);
684 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
685 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
686
687 // get the value of all FileName elements
688 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
689 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
690 Element filename_element = (Element) filename_elements_nodelist.item(j);
691 String filename = XMLTools.getElementTextValue(filename_element);
692 if(!filename.equals(DIRECTORY_FILENAME)) {
693 // Reencode filename after parseXML() had the side-effect of decoding entities in filename elements
694
695 //System.err.println("Filename before reencoding was: " + filename);
696
697 String encoded_filename = filename;
698 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
699 encoded_filename = encoded_filename.replace("&", FilenameEncoding.HEX_ENTITY_AMPERSAND);
700 } else {
701 // Can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object
702 // created by filenameToURLEncoding). So replace backslashes in regex with url-encoded hex-value of backslash, %5C.
703 encoded_filename = filename.replace("\\", "%5C");
704
705 // get the URL encoded filename preserving special encodings, with any curr_directory_path prefix removed
706 encoded_filename = FilenameEncoding.filenameToURLEncodingWithPrefixRemoved(encoded_filename, curr_directory_path);
707
708 // Reintrodudce the backslash characters in place of their %5C hex placeholders
709 encoded_filename = encoded_filename.replace("%5C", "\\");
710 }
711 // Update filename element in DOM
712 XMLTools.setElementTextValue(filename_element, encoded_filename);
713 //System.err.println("Filename after reencoding was: " + encoded_filename);
714 }
715 }
716 }
717 //System.err.println("RE-ENCODED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
718 }
719
720 /**
721 * Every metadata.xml file must be skimmed when a collection is opened, for three very important reasons:
722 * - To handle any non-namespaced metadata in the metadata.xml files (this is mapped and the files rewritten)
723 * - To get a complete list of the metadata elements in the collection (used in Design and Format panes)
724 * - To build complete and accurate metadata value trees (used in the Enrich pane)
725 */
726 public void skimFile()
727 {
728 boolean file_changed = false;
729
730 // Parse the metadata.xml file
731 DebugStream.println("Skimming metadata.xml file " + this + "...");
732
733 Document document = XMLTools.parseXMLFile(this);
734 if (document == null) {
735 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
736 return;
737 }
738 // Always call this method after calling parseXMLFile
739 reEncodeFilenamesInMetadataXML(document);
740
741 // Read all the Metadata elements in the file
742 HashMap target_metadata_element_name_attrs_cache = new HashMap();
743 NodeList metadata_elements_nodelist = document.getElementsByTagName(METADATA_ELEMENT);
744 for (int i = 0; i < metadata_elements_nodelist.getLength(); i++) {
745 Element current_metadata_element = (Element) metadata_elements_nodelist.item(i);
746 String metadata_element_name_full = current_metadata_element.getAttribute("name");
747 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
748
749 // Ignore legacy crap
750 if (metadata_set_namespace.equals("hidden")) {
751 continue;
752 }
753
754 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
755 if (metadata_set == null) {
756 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
757 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
758 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
759 // Skip this element if we still don't have a loaded element for it
760 continue;
761 }
762
763 // Update the metadata.xml file to have the new (namespaced) element name
764 // Instead of using current_metadata_element.setAttribute("name", target_metadata_element_name_full)
765 // we create an Attr object for each target metadata element name, and cache them
766 // This makes a *huge* difference (namespacing a metadata.xml file with 45000 metadata entries now
767 // takes 45 seconds instead of 30 minutes!) -- why is setting the value of a Node so slow?
768 Attr target_metadata_element_name_attr = (Attr) target_metadata_element_name_attrs_cache.get(target_metadata_element_name_full);
769 if (target_metadata_element_name_attr == null) {
770 target_metadata_element_name_attr = document.createAttribute("name");
771 target_metadata_element_name_attr.setValue(target_metadata_element_name_full);
772 target_metadata_element_name_attrs_cache.put(target_metadata_element_name_full, target_metadata_element_name_attr);
773 }
774
775 // Remove the old name attribute and add the new (namespaced) one
776 current_metadata_element.removeAttribute("name");
777 current_metadata_element.setAttributeNode((Attr) target_metadata_element_name_attr.cloneNode(false));
778 file_changed = true;
779
780 metadata_element_name_full = target_metadata_element_name_full;
781 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
782 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
783 }
784
785 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
786 MetadataElement metadata_element = metadata_set.getMetadataElementWithName(metadata_element_name);
787
788 // If the element doesn't exist in the metadata set, add it
789 if (metadata_element == null) {
790 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
791 }
792
793 // Square brackets need to be escaped because they are a special character in Greenstone
794 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
795 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
796 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
797
798 metadata_element.addMetadataValue(metadata_value_string);
799 }
800
801 // Rewrite the metadata.xml file if it has changed
802 if (file_changed) {
803 XMLTools.writeXMLFile(this, document);
804 }
805 }
806
807 /**
808 * The gs.filenameEncoding metadata is unique in that, when added, removed or
809 * replaced, it must be applied on the file(name) whose metadata has been
810 * adjusted.
811 * This method handles all that, given the regular expression or filepath name
812 * to match on (.* matches subdirectories), the affected fileNode, the new
813 * encoding value and whether a new encoding value has been added/an existing
814 * one has been replaced or whether the encoding metadata has been removed.
815 * The new adjusted value for the encoding metadata is returned.
816 *
817 * MetadataXMLFileManager maintains a hashmap of (URL-encoded filepaths, encoding)
818 * to allow fast access to previously assigned gs.filenameEncoding metadata (if
819 * any) for each file. This hashmap also needs to be updated, but this update
820 * is complicated by the fact that it concerns regular expressions that could
821 * affect multiple filenames.
822 */
823 public String processFilenameEncoding(String file_path_regexp, CollectionTreeNode file_node,
824 String encoding_metadata_value, boolean removingMetadata)
825 {
826 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
827 return encoding_metadata_value;
828 }
829
830 // Work out this filenode's new encoding and apply it:
831
832 if(removingMetadata) { // encoding_metadata_value = ""
833 // gs.filenameEncoding metadata being removed, work out
834 // any inherited metadata to replace it with in the meta-table
835 encoding_metadata_value = FilenameEncoding.getInheritedFilenameEncoding(
836 file_node.getURLEncodedFilePath(), file_node.getFile());
837 // should be canonical encoding already
838 }
839 else if(!encoding_metadata_value.equals("")) {
840 // if adding or replacing filename encoding,
841 // get the canonical encoding name for this alias
842 encoding_metadata_value = FilenameEncoding.canonicalEncodingName(encoding_metadata_value);
843 }
844 // Reencode the display of this filenode only as any affected
845 // childnodes will be reencoded on FileNode.refreshDescendantEncodings()
846 file_node.reencodeDisplayName(encoding_metadata_value);
847
848
849 // Whether removing or adding/replacing the file's gs.filename encoding meta,
850 // store this in the file-to-encoding map for fast access, since the map stores
851 // empty string values when no meta has been assigned at this file level.
852 // In the case of removingMetadata, the value stored will be the fallback value
853
854 String urlpath = file_node.getURLEncodedFilePath();
855 if(removingMetadata) {
856 // remove it from the map instead of inserting "", so that when folders in the collectiontree
857 // are being deleted or shifted, the removemetada (and addmetadata) calls that get fired
858 // for each affected filenodes does not cause the undesirable effect of multiple "" to be
859 // entered into the filename-to-encoding map for filepaths that no longer exist .
860 FilenameEncoding.map.remove(urlpath);
861 } else { // for adding and replacing, put the encoding into the map (also replaces any existing encoding for it)
862 FilenameEncoding.map.put(urlpath, encoding_metadata_value);
863 }
864
865 // If new folder-level metadata (or metadata for a set of files fitting a pattern) has been
866 // assigned, the file_to_encodings map will be cleared for all descendant folders and files,
867 // so that these can be re-calculated upon refreshing the visible parts of the CollectionTree.
868 // Mark the state as requiring a refresh of the CollectionTree.
869 // This next step also serves to prevent the MetadataValueTableModel from trying to update
870 // itself while a refresh (involving re-encoding of filenames of visible nodes) is in progress.
871 FilenameEncoding.setRefreshRequired(true);
872
873 return encoding_metadata_value;
874 }
875}
Note: See TracBrowser for help on using the repository browser.