source: main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java@ 33748

Last change on this file since 33748 was 33748, checked in by ak19, 4 years ago

Linux bugfixes to recent commits to do with getting file-level meta assigned to non-ascii filenames or filenames containing plus/ampersand signs to work. Cumulative past commits were sufficient for fixing these issues on Windows. All those changes plus the current ones get it all working on Linux too.

  • Property svn:keywords set to Author Date Id Revision
File size: 41.8 KB
Line 
1/**
2 *############################################################################
3 * A component of the Greenstone Librarian Interface, part of the Greenstone
4 * digital library suite from the New Zealand Digital Library Project at the
5 * University of Waikato, New Zealand.
6 *
7 * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ
8 *
9 * Copyright (C) 2004 New Zealand Digital Library Project
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *############################################################################
25 */
26
27package org.greenstone.gatherer.metadata;
28
29
30import java.io.*;
31import java.util.*;
32import org.greenstone.gatherer.DebugStream;
33import org.greenstone.gatherer.collection.CollectionTreeNode;
34import org.greenstone.gatherer.util.XMLTools;
35import org.w3c.dom.*;
36
37import org.greenstone.gatherer.util.Utility;
38
39/** This class represents one metadata.xml file */
40public class MetadataXMLFile
41 extends File
42{
43 static final private String DESCRIPTION_ELEMENT = "Description";
44 static final private String DIRECTORY_FILENAME = ".*";
45 static final private String FILENAME_ELEMENT = "FileName";
46 static final private String FILESET_ELEMENT = "FileSet";
47 static final private String METADATA_ELEMENT = "Metadata";
48 static final private String[] nonEscapingElements = new String[]{FILENAME_ELEMENT};
49
50 /** Special metadata field: the filename encoding is a unique sort of metadata in
51 * that it is not just information stored with a collection file, but also needs to
52 * be applied in real-time to the collection file (to its filename) for display. */
53 static final public String FILENAME_ENCODING_METADATA = "gs.filenameEncoding";
54
55 // To speed things up a bit we keep the last accessed metadata.xml file in memory
56 static private File loaded_file = null;
57 static private Document loaded_file_document = null;
58 static private boolean loaded_file_changed = false;
59
60
61 public MetadataXMLFile(String metadata_xml_file_path)
62 {
63 super(metadata_xml_file_path);
64 }
65
66
67 public void addMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
68 {
69 // If this metadata.xml file isn't the one currently loaded, load it now
70 if (loaded_file != this) {
71 // First we must save out the currently loaded file
72 saveLoadedFile();
73
74 // Parse the metadata.xml file
75 Document document = XMLTools.parseXMLFile(this);
76 if (document == null) {
77 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
78 return;
79 }
80
81 loaded_file = this;
82 loaded_file_document = document;
83 reEncodeFilenamesInMetadataXML(loaded_file_document);
84
85 }
86
87 // Determine the file's path relative to the location of the metadata.xml file
88 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
89
90 String file_relative_path = file_node.getURLEncodedFilePath();
91 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
92 file_relative_path = file_relative_path.replace("&", FilenameEncoding.HEX_ENTITY_AMPERSAND);
93 }
94 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
95
96 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
97 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
98 }
99
100 // Form a regular expression that specifies the scope of the metadata
101 String file_path_regexp;
102 if (file_relative_path.equals("")) {
103 // Special case for matching all files in the directory
104 file_path_regexp = DIRECTORY_FILENAME;
105 }
106 else {
107 // Convert the file path into a regular expression that will match it
108 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
109 }
110
111 // LEAVE THIS DEBUGGING STATEMENT IN - USEFUL TO DEBUG FILENAME ENCODING ISSUES WHEN META ASSIGNED
112 //System.err.println("MetadataXMLFile.addMetadata() Adding meta for file regexp: "
113 // + file_path_regexp + " - " + org.greenstone.gatherer.util.Utility.debugUnicodeString(file_path_regexp));
114
115 // Find the appropriate FileSet element for this file
116 Element appropriate_fileset_element = null;
117
118 // Read all the FileSet elements in the file
119 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
120 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
121 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
122
123 // Check the FileName elements of the FileSet to see if we have a match
124 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
125 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
126 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
127 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
128
129 // Only exact matches can be extended with new metadata
130 if (current_filename_element_value.equals(file_path_regexp)) {
131 appropriate_fileset_element = current_fileset_element;
132 break;
133 }
134 }
135 }
136
137 // If no appropriate FileSet element exists create a new one for this file
138 if (appropriate_fileset_element == null) {
139 DebugStream.println("Creating new FileSet element for file since none exists..."+file_path_regexp);
140 appropriate_fileset_element = loaded_file_document.createElement(FILESET_ELEMENT);
141
142 Element new_filename_element = loaded_file_document.createElement(FILENAME_ELEMENT);
143 new_filename_element.appendChild(loaded_file_document.createTextNode(file_path_regexp));
144 appropriate_fileset_element.appendChild(new_filename_element);
145
146 Element new_description_element = loaded_file_document.createElement(DESCRIPTION_ELEMENT);
147 appropriate_fileset_element.appendChild(new_description_element);
148
149 // add the fileset element for .* at the top: especially important for
150 // non-accumulating (and override mode) meta. Other type fileset elements can be appended
151 if(file_path_regexp.equals(DIRECTORY_FILENAME)) {
152 loaded_file_document.getDocumentElement().insertBefore(appropriate_fileset_element,
153 loaded_file_document.getDocumentElement().getFirstChild());
154 } else {
155 loaded_file_document.getDocumentElement().appendChild(appropriate_fileset_element);
156 }
157 }
158
159 // Add each of the metadata values to the FileSet's Description element
160 Element description_element = (Element) appropriate_fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT).item(0);
161 for (int i = 0; i < metadata_values.size(); i++) {
162 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
163 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
164
165 // Remove any characters that are invalid in XML
166 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
167
168 // Square brackets need to be escaped because they are a special character in Greenstone
169 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
170 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
171
172 // the gs.filenameEncoding metadata is unique in that, when added, removed or
173 // changed, it must be applied on the file(name) whose metadata has been adjusted
174 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
175 metadata_value_string = processFilenameEncoding(file_path_regexp,
176 file_node, metadata_value_string, false);
177 // true only if removing meta
178 }
179
180 // Check if this piece of metadata has already been assigned to this FileSet element
181 boolean metadata_already_assigned = false;
182 NodeList metadata_elements_nodelist = description_element.getElementsByTagName(METADATA_ELEMENT);
183 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
184 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
185
186 // Check if the metadata element name matches
187 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
188 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
189 // if the metadata must not accumulate, then edit the current value
190 if (!metadata_value.isAccumulatingMetadata()) {
191 XMLTools.setNodeText(current_metadata_element, metadata_value_string);
192 metadata_already_assigned = true;
193 break;
194 }
195 // Check if the metadata element value matches
196 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
197 if (current_metadata_value_string.equals(metadata_value_string)) {
198 // Metadata already assigned
199 metadata_already_assigned = true;
200 break;
201 }
202 }
203 }
204
205 // If the piece of metadata hasn't already been assigned, add it now
206 if (!metadata_already_assigned) {
207 // Create a new Metadata element to record this metadata
208 Element new_metadata_element = loaded_file_document.createElement(METADATA_ELEMENT);
209 new_metadata_element.setAttribute("name", metadata_value.getMetadataElement().getFullName());
210 new_metadata_element.setAttribute("mode", (metadata_value.isAccumulatingMetadata() ? "accumulate" : "override"));
211 new_metadata_element.appendChild(loaded_file_document.createTextNode(metadata_value_string));
212
213 // Accumulating metadata: add at the end
214 if (metadata_value.isAccumulatingMetadata()) {
215 description_element.appendChild(new_metadata_element);
216 }
217 // Override metadata: add at the start (so it overrides inherited metadata without affecting other assigned metadata)
218 else {
219 description_element.insertBefore(new_metadata_element, description_element.getFirstChild());
220 }
221 }
222 }
223
224 // Remember that we've changed the file so it gets saved when a new one is loaded
225 loaded_file_changed = true;
226 }
227
228
229 // DO NOT REMOVE THE System.err DEBUGGING STATEMENTS FROM THIS METHOD: HELPS WITH TESTING/DEBUGGING
230 // WHEN FILE-LEVEL META IS ASSIGNED TO NON-ASCII ENCODED FILENAMES OR WITH FILENAMES CONTAINING +/ampersand
231 public ArrayList getMetadataAssignedToFile(File file, boolean fileEncodingOnly)
232 {
233 // If this metadata.xml file isn't the one currently loaded, load it now
234 if (loaded_file != this) {
235 // First we must save out the currently loaded file
236 saveLoadedFile();
237
238 // Parse the metadata.xml file
239 Document document = XMLTools.parseXMLFile(this);
240 if (document == null) {
241 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
242 return new ArrayList();
243 }
244
245 loaded_file = this;
246 loaded_file_document = document;
247
248 reEncodeFilenamesInMetadataXML(loaded_file_document);
249 }
250
251 // Determine the file's path relative to the location of the metadata.xml file
252 String file_relative_path = FilenameEncoding.fileToURLEncoding(file);
253 File metadata_xml_file_directory = getParentFile();
254 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(metadata_xml_file_directory);
255 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
256
257 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
258 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
259 }
260
261 // Build up a list of metadata assigned to this file
262 ArrayList metadata_values = new ArrayList();
263
264 // Read all the FileSet elements in the file
265 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
266 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
267 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
268 boolean current_fileset_matches = false;
269 boolean is_one_file_only_metadata = true;
270 File folder_metadata_inherited_from = null;
271
272 // Check the FileName elements of the FileSet to see if we have a match
273 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
274 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
275 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
276 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
277
278 //System.err.println("\n Original TAIL filename was: " + Utility.debugUnicodeString(file.getName()));
279 String hexdecoded_regexed_file_relative_path = FilenameEncoding.decodeStringContainingHexEntities(file_relative_path);
280 //System.err.println("Looking in meta.xml for hexdecoded_regexed_file_RELATIVE_path: " + hexdecoded_regexed_file_relative_path
281 //+ " - debug version: " + Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path));
282
283 // Does this fileset specify metadata for one file only?
284 is_one_file_only_metadata = true;
285 if (current_filename_element_value.indexOf("*") != -1 && !current_filename_element_value.equals(DIRECTORY_FILENAME)) {
286 // No, it specifies metadata for multiple files (but not all the files in the directory)
287 is_one_file_only_metadata = false;
288 }
289
290 String hexdecoded_current_filename_element_value = FilenameEncoding.decodeStringContainingHexEntities(current_filename_element_value);
291 //System.err.println(" Checking to see if it matches " + hexdecoded_current_filename_element_value + " - debug: " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
292 //System.err.println(" Checking to see if it matches " + current_filename_element_value + " - debug: " + Utility.debugUnicodeString(current_filename_element_value));
293
294 // This fileset specifies metadata for the file
295 // MetadataXMLFile.addMetadata(CollectionTreeNode, ArrayList) stored filename in uppercase hex
296 // so need to make sure everything hex has been decoded (no more hex) to compare apples with apples
297 if (hexdecoded_regexed_file_relative_path.matches(hexdecoded_current_filename_element_value)) { //if (file_relative_path.matches(current_filename_element_value)) {
298 //System.err.println(" @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path + "\n");
299 current_fileset_matches = true;
300 if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) {
301 folder_metadata_inherited_from = metadata_xml_file_directory;
302 }
303 break;
304 } //else {
305 //System.err.println( hexdecoded_regexed_file_relative_path + " does not match " + hexdecoded_current_filename_element_value);
306 //System.err.println( Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path) + " does not match " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
307 //}
308
309 // This fileset specifies metadata for the folder the file is in
310 if (hexdecoded_regexed_file_relative_path.startsWith(hexdecoded_current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) {
311 current_fileset_matches = true;
312 folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value);
313 break;
314 }
315 }
316
317 // The FileSet doesn't apply, so move onto the next one
318 if (current_fileset_matches == false) {
319 continue;
320 }
321
322 // Read all the Metadata elements in the fileset
323 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
324 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
325 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
326 String metadata_element_name_full = current_metadata_element.getAttribute("name");
327 // if we're only looking for fileEncoding metadata and this isn't it, skip to the next
328 if(fileEncodingOnly && !metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
329 continue;
330 }
331 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
332
333 // Ignore legacy crap
334 if (metadata_set_namespace.equals("hidden")) {
335 continue;
336 }
337
338 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
339 if (metadata_set == null) {
340 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
341 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
342 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
343 // Skip this element if we still don't have a loaded element for it
344 continue;
345 }
346
347 metadata_element_name_full = target_metadata_element_name_full;
348 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
349 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
350 }
351
352 MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(metadata_element_name_full);
353
354 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
355 // If the element doesn't exist in the metadata set, we're not interested
356 //Shaoqun modified. It needs to be added to metadata_set because the user might disable skim file
357 if (metadata_element == null) {
358 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
359 // continue;
360 }
361
362 // Square brackets need to be escaped because they are a special character in Greenstone
363 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
364 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
365 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
366
367 MetadataValueTreeNode metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
368
369 // If there is no metadata value tree node for this value, create it
370 if (metadata_value_tree_node == null) {
371 DebugStream.println("Note: No value tree node for metadata value \"" + metadata_value_string + "\"");
372 metadata_element.addMetadataValue(metadata_value_string);
373 metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
374 }
375
376 MetadataValue metadata_value = new MetadataValue(metadata_element, metadata_value_tree_node);
377 metadata_value.inheritsMetadataFromFolder(folder_metadata_inherited_from);
378 metadata_value.setIsOneFileOnlyMetadata(is_one_file_only_metadata);
379
380 // Is this accumulating metadata?
381 if (current_metadata_element.getAttribute("mode").equals("accumulate")) {
382 metadata_value.setIsAccumulatingMetadata(true);
383 }
384
385 // Add the new metadata value to the list
386 metadata_values.add(metadata_value);
387 }
388 }
389
390 return metadata_values;
391 }
392
393
394 public void removeMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
395 {
396 // If this metadata.xml file isn't the one currently loaded, load it now
397 if (loaded_file != this) {
398 // First we must save out the currently loaded file
399 saveLoadedFile();
400
401 // Parse the metadata.xml file
402 Document document = XMLTools.parseXMLFile(this);
403 if (document == null) {
404 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
405 return;
406 }
407
408 loaded_file = this;
409 loaded_file_document = document;
410
411 reEncodeFilenamesInMetadataXML(loaded_file_document);
412 }
413
414 // Determine the file's path relative to the location of the metadata.xml file
415 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
416 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
417 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
418 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
419 }
420
421 // Form a regular expression that specifies the scope of the metadata
422 String file_path_regexp;
423 if (file_relative_path.equals("")) {
424 // Special case for matching all files in the directory
425 file_path_regexp = DIRECTORY_FILENAME;
426 }
427 else {
428 // Convert the file path into a regular expression that will match it
429 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
430 }
431
432 // Find the appropriate FileSet element for this file
433 Element appropriate_fileset_element = null;
434
435 // Read all the FileSet elements in the file
436 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
437 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
438 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
439
440 // Check the FileName elements of the FileSet to see if we have a match
441 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
442 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
443 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
444 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
445
446 // Only exact matches can be extended with new metadata
447 if (current_filename_element_value.equals(file_path_regexp)) {
448 appropriate_fileset_element = current_fileset_element;
449 break;
450 }
451 }
452 }
453
454 // If no appropriate FileSet element exists the metadata isn't assigned in this metadata.xml file
455 if (appropriate_fileset_element == null) {
456 DebugStream.println("Note: No appropriate FileSet element found when removing metadata from " + this);
457 return;
458 }
459
460 // Remove each of the metadata values from the FileSet's Description element
461 for (int i = 0; i < metadata_values.size(); i++) {
462 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
463
464 // Remove any characters that are invalid in XML
465 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
466
467 // Square brackets need to be escaped because they are a special character in Greenstone
468 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
469 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
470
471 // Find the Metadata element to delete from the fileset
472 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
473 NodeList metadata_elements_nodelist = appropriate_fileset_element.getElementsByTagName(METADATA_ELEMENT);
474 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
475 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
476
477 // Check the metadata element name matches
478 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
479 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
480 // Check the metadata element value matches
481 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
482 if (current_metadata_value_string.equals(metadata_value_string)) {
483
484 // Remove this Metadata element
485 current_metadata_element.getParentNode().removeChild(current_metadata_element);
486
487 // the gs.filenameEncoding metadata is unique in that, when added, removed or
488 // changed, it must be applied on the file(name) whose metadata has been adjusted
489 if(current_metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
490
491 // metadata_value_string will hereafter be the inherited gs.FilenameEncoding
492 // metadata (if any), now that the value at this level has been removed
493 metadata_value_string = processFilenameEncoding(file_path_regexp,
494 file_node, "", true); // true only if *removing* this meta
495 }
496
497 // If there are no Metadata elements left now, remove the (empty) FileSet element
498 if (metadata_elements_nodelist.getLength() == 0) {
499 appropriate_fileset_element.getParentNode().removeChild(appropriate_fileset_element);
500 }
501
502 break;
503 }
504 }
505 }
506 }
507
508 // Remember that we've changed the file so it gets saved when a new one is loaded
509 loaded_file_changed = true;
510 }
511
512
513 public void replaceMetadata(CollectionTreeNode file_node, MetadataValue old_metadata_value, MetadataValue new_metadata_value)
514 {
515 // If this metadata.xml file isn't the one currently loaded, load it now
516 if (loaded_file != this) {
517 // First we must save out the currently loaded file
518 saveLoadedFile();
519
520 // Parse the metadata.xml file
521 Document document = XMLTools.parseXMLFile(this);
522 if (document == null) {
523 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
524 return;
525 }
526
527 loaded_file = this;
528 loaded_file_document = document;
529
530 reEncodeFilenamesInMetadataXML(loaded_file_document);
531 }
532
533 // Determine the file's path relative to the location of the metadata.xml file
534 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
535 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
536 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
537 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
538 }
539
540 // Form a regular expression that specifies the scope of the metadata
541 String file_path_regexp;
542 if (file_relative_path.equals("")) {
543 // Special case for matching all files in the directory
544 file_path_regexp = DIRECTORY_FILENAME;
545 }
546 else {
547 // Convert the file path into a regular expression that will match it
548 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
549 }
550
551 // Remove any characters that are invalid in XML
552 String old_metadata_value_string = XMLTools.removeInvalidCharacters(old_metadata_value.getFullValue());
553 String new_metadata_value_string = XMLTools.removeInvalidCharacters(new_metadata_value.getFullValue());
554
555 // Square brackets need to be escaped because they are a special character in Greenstone
556 old_metadata_value_string = old_metadata_value_string.replaceAll("\\[", "&#091;");
557 old_metadata_value_string = old_metadata_value_string.replaceAll("\\]", "&#093;");
558 new_metadata_value_string = new_metadata_value_string.replaceAll("\\[", "&#091;");
559 new_metadata_value_string = new_metadata_value_string.replaceAll("\\]", "&#093;");
560
561 // Read all the FileSet elements in the file
562 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
563 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
564 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
565 boolean current_fileset_matches = false;
566
567 // Check the FileName elements of the FileSet to see if we have a match
568 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
569 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
570 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
571 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
572
573 // Only exact matches can be edited
574 if (current_filename_element_value.equals(file_path_regexp)) {
575 current_fileset_matches = true;
576 break;
577 }
578 }
579
580 // The FileSet doesn't apply, so move onto the next one
581 if (current_fileset_matches == false) {
582 continue;
583 }
584
585 // Each metadata value is only allowed to be assigned once
586 boolean new_metadata_value_already_exists = false;
587 Element metadata_element_to_edit = null;
588
589 // Find the Metadata element to replace in the fileset
590 String metadata_element_name_full = old_metadata_value.getMetadataElement().getFullName();
591 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
592 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
593 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
594
595 // Check the metadata element name matches
596 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
597 if (!current_metadata_element_name_full.equals(metadata_element_name_full)) {
598 continue;
599 }
600
601 // Check the new metadata value doesn't already exist
602 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
603 if (current_metadata_value_string.equals(new_metadata_value_string)) {
604 new_metadata_value_already_exists = true;
605 }
606
607 // Check the metadata element value matches
608 if (current_metadata_value_string.equals(old_metadata_value_string)) {
609 metadata_element_to_edit = current_metadata_element;
610 }
611 }
612
613 // If the new metadata value already existed, remove the original value
614 if (new_metadata_value_already_exists) {
615 if(metadata_element_to_edit != null) { //?????????
616 metadata_element_to_edit.getParentNode().removeChild(metadata_element_to_edit);
617 } else {
618 System.err.println("ERROR MetadataXMLFile: metadata_element_to_edit is null");
619 }
620 }
621 // Otherwise replace the old value with the new value
622 // Ensure metadata_element_to_edit isn't null (may occur when multiple files are selected)
623 else if (metadata_element_to_edit != null) {
624
625 // the gs.filenameEncoding metadata is unique in that, when added, removed or
626 // changed, it must be applied on the file(name) whose metadata has been adjusted
627 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
628 new_metadata_value_string = processFilenameEncoding(file_path_regexp, file_node, new_metadata_value_string, false);
629 // true only if removing meta
630 }
631 XMLTools.setElementTextValue(metadata_element_to_edit, new_metadata_value_string);
632 }
633 }
634
635 // Remember that we've changed the file so it gets saved when a new one is loaded
636 loaded_file_changed = true;
637 }
638
639
640 static public void saveLoadedFile()
641 {
642 // If we have a file loaded into memory and it has been modified, save it now
643 if (loaded_file != null && loaded_file_changed == true) {
644 //System.err.println("START saveLoadedFile(), loaded_file_document:\n" + XMLTools.elementToString(loaded_file_document.getDocumentElement(), true));
645
646 XMLTools.writeXMLFile(loaded_file, loaded_file_document, nonEscapingElements);
647
648 loaded_file_changed = false;
649 }
650 }
651
652 /**
653 * parseXML(metadata.xml) has the side-effect of resolving html entities.
654 * Although this is not done by the GLIEntityResolver usage in parseXML(), something
655 * in parseXML() is resolving the html entities, including those used in carefully
656 * html-entity-escaped filenames.
657 * We need to get the filenames in the DOM correct after parsing a metadata.xml file
658 * into memory, so that we have the correct filenames and so that we'll write it out correctly.
659 * Therefore, always call this method after a successful parseXML() call on a metadata.xml.
660 * @param doc is the Document where the FILENAME_ELEMENTs need to be re-encoded.
661 * At the end of this function, the doc will be modified with the re-encoded filenames.
662 *
663 * DO NOT REMOVE THE DEBUGGING STATEMENTS IN THIS FUNCTION, AS THEY'RE USEFUL
664 * FOR DEBUGGING ENCODING ISSUES TO DO WITH FILE LEVEL META ASSIGNED TO FILENAMES
665 * THAT ARE NON-ASCII OR CONTAIN +/ampersands IN THEM.
666 */
667 static private void reEncodeFilenamesInMetadataXML(Document doc) {
668
669 String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding("."); // returns the curr dir path after removing the /./ at end
670 //System.err.println("@@@ curr_directory_path: " + curr_directory_path);
671
672 //System.err.println("PARSED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
673
674 // Read all the FileSet elements in the file
675 NodeList fileset_elements_nodelist = doc.getElementsByTagName(FILESET_ELEMENT);
676 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
677 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
678
679 // get the value of all FileName elements
680 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
681 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
682 Element filename_element = (Element) filename_elements_nodelist.item(j);
683 String filename = XMLTools.getElementTextValue(filename_element);
684 if(!filename.equals(DIRECTORY_FILENAME)) {
685 // Reencode filename after parseXML() had the side-effect of decoding entities in filename elements
686
687 //System.err.println("Filename before reencoding was: " + filename);
688
689 String encoded_filename = filename;
690 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
691 encoded_filename = encoded_filename.replace("&", FilenameEncoding.HEX_ENTITY_AMPERSAND);
692 } else {
693 // Can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object
694 // created by filenameToURLEncoding). So replace backslashes in regex with url-encoded hex-value of backslash, %5C.
695 encoded_filename = filename.replace("\\", "%5C");
696
697 // get the URL encoded filename preserving special encodings, with any curr_directory_path prefix removed
698 encoded_filename = FilenameEncoding.filenameToURLEncodingWithPrefixRemoved(encoded_filename, curr_directory_path);
699
700 // Reintrodudce the backslash characters in place of their %5C hex placeholders
701 encoded_filename = encoded_filename.replace("%5C", "\\");
702 }
703 // Update filename element in DOM
704 XMLTools.setElementTextValue(filename_element, encoded_filename);
705 //System.err.println("Filename after reencoding was: " + encoded_filename);
706 }
707 }
708 }
709 //System.err.println("RE-ENCODED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
710 }
711
712 /**
713 * Every metadata.xml file must be skimmed when a collection is opened, for three very important reasons:
714 * - To handle any non-namespaced metadata in the metadata.xml files (this is mapped and the files rewritten)
715 * - To get a complete list of the metadata elements in the collection (used in Design and Format panes)
716 * - To build complete and accurate metadata value trees (used in the Enrich pane)
717 */
718 public void skimFile()
719 {
720 boolean file_changed = false;
721
722 // Parse the metadata.xml file
723 DebugStream.println("Skimming metadata.xml file " + this + "...");
724
725 Document document = XMLTools.parseXMLFile(this);
726 if (document == null) {
727 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
728 return;
729 }
730 // Always call this method after calling parseXMLFile
731 reEncodeFilenamesInMetadataXML(document);
732
733 // Read all the Metadata elements in the file
734 HashMap target_metadata_element_name_attrs_cache = new HashMap();
735 NodeList metadata_elements_nodelist = document.getElementsByTagName(METADATA_ELEMENT);
736 for (int i = 0; i < metadata_elements_nodelist.getLength(); i++) {
737 Element current_metadata_element = (Element) metadata_elements_nodelist.item(i);
738 String metadata_element_name_full = current_metadata_element.getAttribute("name");
739 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
740
741 // Ignore legacy crap
742 if (metadata_set_namespace.equals("hidden")) {
743 continue;
744 }
745
746 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
747 if (metadata_set == null) {
748 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
749 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
750 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
751 // Skip this element if we still don't have a loaded element for it
752 continue;
753 }
754
755 // Update the metadata.xml file to have the new (namespaced) element name
756 // Instead of using current_metadata_element.setAttribute("name", target_metadata_element_name_full)
757 // we create an Attr object for each target metadata element name, and cache them
758 // This makes a *huge* difference (namespacing a metadata.xml file with 45000 metadata entries now
759 // takes 45 seconds instead of 30 minutes!) -- why is setting the value of a Node so slow?
760 Attr target_metadata_element_name_attr = (Attr) target_metadata_element_name_attrs_cache.get(target_metadata_element_name_full);
761 if (target_metadata_element_name_attr == null) {
762 target_metadata_element_name_attr = document.createAttribute("name");
763 target_metadata_element_name_attr.setValue(target_metadata_element_name_full);
764 target_metadata_element_name_attrs_cache.put(target_metadata_element_name_full, target_metadata_element_name_attr);
765 }
766
767 // Remove the old name attribute and add the new (namespaced) one
768 current_metadata_element.removeAttribute("name");
769 current_metadata_element.setAttributeNode((Attr) target_metadata_element_name_attr.cloneNode(false));
770 file_changed = true;
771
772 metadata_element_name_full = target_metadata_element_name_full;
773 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
774 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
775 }
776
777 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
778 MetadataElement metadata_element = metadata_set.getMetadataElementWithName(metadata_element_name);
779
780 // If the element doesn't exist in the metadata set, add it
781 if (metadata_element == null) {
782 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
783 }
784
785 // Square brackets need to be escaped because they are a special character in Greenstone
786 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
787 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
788 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
789
790 metadata_element.addMetadataValue(metadata_value_string);
791 }
792
793 // Rewrite the metadata.xml file if it has changed
794 if (file_changed) {
795 XMLTools.writeXMLFile(this, document);
796 }
797 }
798
799 /**
800 * The gs.filenameEncoding metadata is unique in that, when added, removed or
801 * replaced, it must be applied on the file(name) whose metadata has been
802 * adjusted.
803 * This method handles all that, given the regular expression or filepath name
804 * to match on (.* matches subdirectories), the affected fileNode, the new
805 * encoding value and whether a new encoding value has been added/an existing
806 * one has been replaced or whether the encoding metadata has been removed.
807 * The new adjusted value for the encoding metadata is returned.
808 *
809 * MetadataXMLFileManager maintains a hashmap of (URL-encoded filepaths, encoding)
810 * to allow fast access to previously assigned gs.filenameEncoding metadata (if
811 * any) for each file. This hashmap also needs to be updated, but this update
812 * is complicated by the fact that it concerns regular expressions that could
813 * affect multiple filenames.
814 */
815 public String processFilenameEncoding(String file_path_regexp, CollectionTreeNode file_node,
816 String encoding_metadata_value, boolean removingMetadata)
817 {
818 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
819 return encoding_metadata_value;
820 }
821
822 // Work out this filenode's new encoding and apply it:
823
824 if(removingMetadata) { // encoding_metadata_value = ""
825 // gs.filenameEncoding metadata being removed, work out
826 // any inherited metadata to replace it with in the meta-table
827 encoding_metadata_value = FilenameEncoding.getInheritedFilenameEncoding(
828 file_node.getURLEncodedFilePath(), file_node.getFile());
829 // should be canonical encoding already
830 }
831 else if(!encoding_metadata_value.equals("")) {
832 // if adding or replacing filename encoding,
833 // get the canonical encoding name for this alias
834 encoding_metadata_value = FilenameEncoding.canonicalEncodingName(encoding_metadata_value);
835 }
836 // Reencode the display of this filenode only as any affected
837 // childnodes will be reencoded on FileNode.refreshDescendantEncodings()
838 file_node.reencodeDisplayName(encoding_metadata_value);
839
840
841 // Whether removing or adding/replacing the file's gs.filename encoding meta,
842 // store this in the file-to-encoding map for fast access, since the map stores
843 // empty string values when no meta has been assigned at this file level.
844 // In the case of removingMetadata, the value stored will be the fallback value
845
846 String urlpath = file_node.getURLEncodedFilePath();
847 if(removingMetadata) {
848 // remove it from the map instead of inserting "", so that when folders in the collectiontree
849 // are being deleted or shifted, the removemetada (and addmetadata) calls that get fired
850 // for each affected filenodes does not cause the undesirable effect of multiple "" to be
851 // entered into the filename-to-encoding map for filepaths that no longer exist .
852 FilenameEncoding.map.remove(urlpath);
853 } else { // for adding and replacing, put the encoding into the map (also replaces any existing encoding for it)
854 FilenameEncoding.map.put(urlpath, encoding_metadata_value);
855 }
856
857 // If new folder-level metadata (or metadata for a set of files fitting a pattern) has been
858 // assigned, the file_to_encodings map will be cleared for all descendant folders and files,
859 // so that these can be re-calculated upon refreshing the visible parts of the CollectionTree.
860 // Mark the state as requiring a refresh of the CollectionTree.
861 // This next step also serves to prevent the MetadataValueTableModel from trying to update
862 // itself while a refresh (involving re-encoding of filenames of visible nodes) is in progress.
863 FilenameEncoding.setRefreshRequired(true);
864
865 return encoding_metadata_value;
866 }
867}
Note: See TracBrowser for help on using the repository browser.