source: main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java@ 33737

Last change on this file since 33737 was 33737, checked in by ak19, 4 years ago

A larger fix but not complete fix to the problem of attaching and retaining file level assigned meta to filenames containing non-ASCII characters. 1. Committing intermediate version of bugfix containing the idea suggested by Kathy to reuse the steps in fileToURLEncoding(File) for a String parameter as she felt that since the String represents a filename, a URI object should be instantiable on a String. Worked with some massaging. Can't yet get the new fileToURLEncoding(String) to work by calling fileToURLEncoding(File). So am committing the version of fileToURLEncoding(String) that is largely a copy of fileToURLEncoding(File), until I can get the simpler variant working. 2. The new method is called after each successful parseXML call from MetadataXMLFile, so that the decoded entities resulting from parseXML() are reencoded in the DOM. This allows us to retain the correct filenames originally mentioned in metadata.xml files, do proper comparisons against them to attach/modify further metdata and so that the correct values get written out again into metadata.xml files. 3. Still want to get simpler version of fileToURLEncoding(String) to work that reuses fileToURLEncoding(File). 4. Want to get ampersand and plus signs in filenames to work (+ signs in filenames are lost when filenames are converted to URL). 5. Still need to investigate the missing ex. metadata for filenames containing non-ASCII.

  • Property svn:keywords set to Author Date Id Revision
File size: 40.8 KB
Line 
1/**
2 *############################################################################
3 * A component of the Greenstone Librarian Interface, part of the Greenstone
4 * digital library suite from the New Zealand Digital Library Project at the
5 * University of Waikato, New Zealand.
6 *
7 * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ
8 *
9 * Copyright (C) 2004 New Zealand Digital Library Project
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *############################################################################
25 */
26
27package org.greenstone.gatherer.metadata;
28
29
30import java.io.*;
31import java.util.*;
32import org.greenstone.gatherer.DebugStream;
33import org.greenstone.gatherer.collection.CollectionTreeNode;
34import org.greenstone.gatherer.util.XMLTools;
35import org.w3c.dom.*;
36
37import org.greenstone.gatherer.util.Utility;
38
39/** This class represents one metadata.xml file */
40public class MetadataXMLFile
41 extends File
42{
43 static final private String DESCRIPTION_ELEMENT = "Description";
44 static final private String DIRECTORY_FILENAME = ".*";
45 static final private String FILENAME_ELEMENT = "FileName";
46 static final private String FILESET_ELEMENT = "FileSet";
47 static final private String METADATA_ELEMENT = "Metadata";
48 static final private String[] nonEscapingElements = new String[]{FILENAME_ELEMENT};
49
50 /** Special metadata field: the filename encoding is a unique sort of metadata in
51 * that it is not just information stored with a collection file, but also needs to
52 * be applied in real-time to the collection file (to its filename) for display. */
53 static final public String FILENAME_ENCODING_METADATA = "gs.filenameEncoding";
54
55 // To speed things up a bit we keep the last accessed metadata.xml file in memory
56 static private File loaded_file = null;
57 static private Document loaded_file_document = null;
58 static private boolean loaded_file_changed = false;
59
60
61 public MetadataXMLFile(String metadata_xml_file_path)
62 {
63 super(metadata_xml_file_path);
64 }
65
66
67 public void addMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
68 {
69 // If this metadata.xml file isn't the one currently loaded, load it now
70 if (loaded_file != this) {
71 // First we must save out the currently loaded file
72 saveLoadedFile();
73
74 // Parse the metadata.xml file
75 Document document = XMLTools.parseXMLFile(this);
76 if (document == null) {
77 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
78 return;
79 }
80
81 loaded_file = this;
82 loaded_file_document = document;
83 reEncodeFilenamesInMetadataXML(loaded_file_document);
84
85 }
86
87 // Determine the file's path relative to the location of the metadata.xml file
88 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
89 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
90 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
91 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
92 }
93
94 // Form a regular expression that specifies the scope of the metadata
95 String file_path_regexp;
96 if (file_relative_path.equals("")) {
97 // Special case for matching all files in the directory
98 file_path_regexp = DIRECTORY_FILENAME;
99 }
100 else {
101 // Convert the file path into a regular expression that will match it
102 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
103 }
104
105 //System.err.println("MetadataXMLFile.addMetadata() Adding meta for file regexp: "
106 // + file_path_regexp + " - " + org.greenstone.gatherer.util.Utility.debugUnicodeString(file_path_regexp));
107
108 // Find the appropriate FileSet element for this file
109 Element appropriate_fileset_element = null;
110
111 // Read all the FileSet elements in the file
112 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
113 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
114 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
115
116 // Check the FileName elements of the FileSet to see if we have a match
117 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
118 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
119 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
120 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
121
122 // Only exact matches can be extended with new metadata
123 if (current_filename_element_value.equals(file_path_regexp)) {
124 appropriate_fileset_element = current_fileset_element;
125 break;
126 }
127 }
128 }
129
130 // If no appropriate FileSet element exists create a new one for this file
131 if (appropriate_fileset_element == null) {
132 DebugStream.println("Creating new FileSet element for file since none exists..."+file_path_regexp);
133 appropriate_fileset_element = loaded_file_document.createElement(FILESET_ELEMENT);
134
135 Element new_filename_element = loaded_file_document.createElement(FILENAME_ELEMENT);
136 new_filename_element.appendChild(loaded_file_document.createTextNode(file_path_regexp));
137 appropriate_fileset_element.appendChild(new_filename_element);
138
139 Element new_description_element = loaded_file_document.createElement(DESCRIPTION_ELEMENT);
140 appropriate_fileset_element.appendChild(new_description_element);
141
142 // add the fileset element for .* at the top: especially important for
143 // non-accumulating (and override mode) meta. Other type fileset elements can be appended
144 if(file_path_regexp.equals(DIRECTORY_FILENAME)) {
145 loaded_file_document.getDocumentElement().insertBefore(appropriate_fileset_element,
146 loaded_file_document.getDocumentElement().getFirstChild());
147 } else {
148 loaded_file_document.getDocumentElement().appendChild(appropriate_fileset_element);
149 }
150 }
151
152 // Add each of the metadata values to the FileSet's Description element
153 Element description_element = (Element) appropriate_fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT).item(0);
154 for (int i = 0; i < metadata_values.size(); i++) {
155 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
156 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
157
158 // Remove any characters that are invalid in XML
159 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
160
161 // Square brackets need to be escaped because they are a special character in Greenstone
162 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
163 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
164
165 // the gs.filenameEncoding metadata is unique in that, when added, removed or
166 // changed, it must be applied on the file(name) whose metadata has been adjusted
167 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
168 metadata_value_string = processFilenameEncoding(file_path_regexp,
169 file_node, metadata_value_string, false);
170 // true only if removing meta
171 }
172
173 // Check if this piece of metadata has already been assigned to this FileSet element
174 boolean metadata_already_assigned = false;
175 NodeList metadata_elements_nodelist = description_element.getElementsByTagName(METADATA_ELEMENT);
176 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
177 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
178
179 // Check if the metadata element name matches
180 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
181 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
182 // if the metadata must not accumulate, then edit the current value
183 if (!metadata_value.isAccumulatingMetadata()) {
184 XMLTools.setNodeText(current_metadata_element, metadata_value_string);
185 metadata_already_assigned = true;
186 break;
187 }
188 // Check if the metadata element value matches
189 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
190 if (current_metadata_value_string.equals(metadata_value_string)) {
191 // Metadata already assigned
192 metadata_already_assigned = true;
193 break;
194 }
195 }
196 }
197
198 // If the piece of metadata hasn't already been assigned, add it now
199 if (!metadata_already_assigned) {
200 // Create a new Metadata element to record this metadata
201 Element new_metadata_element = loaded_file_document.createElement(METADATA_ELEMENT);
202 new_metadata_element.setAttribute("name", metadata_value.getMetadataElement().getFullName());
203 new_metadata_element.setAttribute("mode", (metadata_value.isAccumulatingMetadata() ? "accumulate" : "override"));
204 new_metadata_element.appendChild(loaded_file_document.createTextNode(metadata_value_string));
205
206 // Accumulating metadata: add at the end
207 if (metadata_value.isAccumulatingMetadata()) {
208 description_element.appendChild(new_metadata_element);
209 }
210 // Override metadata: add at the start (so it overrides inherited metadata without affecting other assigned metadata)
211 else {
212 description_element.insertBefore(new_metadata_element, description_element.getFirstChild());
213 }
214 }
215 }
216
217 // Remember that we've changed the file so it gets saved when a new one is loaded
218 loaded_file_changed = true;
219 }
220
221
222 public ArrayList getMetadataAssignedToFile(File file, boolean fileEncodingOnly)
223 {
224 // If this metadata.xml file isn't the one currently loaded, load it now
225 if (loaded_file != this) {
226 // First we must save out the currently loaded file
227 saveLoadedFile();
228
229 // Parse the metadata.xml file
230 Document document = XMLTools.parseXMLFile(this);
231 if (document == null) {
232 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
233 return new ArrayList();
234 }
235
236 loaded_file = this;
237 loaded_file_document = document;
238
239 reEncodeFilenamesInMetadataXML(loaded_file_document);
240 }
241
242 // Determine the file's path relative to the location of the metadata.xml file
243 String file_relative_path = FilenameEncoding.fileToURLEncoding(file);
244 File metadata_xml_file_directory = getParentFile();
245 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(metadata_xml_file_directory);
246 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
247
248 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
249 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
250 }
251
252 // Build up a list of metadata assigned to this file
253 ArrayList metadata_values = new ArrayList();
254
255 // Read all the FileSet elements in the file
256 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
257 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
258 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
259 boolean current_fileset_matches = false;
260 boolean is_one_file_only_metadata = true;
261 File folder_metadata_inherited_from = null;
262
263 // Check the FileName elements of the FileSet to see if we have a match
264 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
265 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
266 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
267 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
268
269 //System.err.println("\n Original TAIL filename was: " + Utility.debugUnicodeString(file.getName()));
270 String hexdecoded_regexed_file_relative_path = FilenameEncoding.decodeStringContainingHexEntities(file_relative_path);
271 //System.err.println("Looking in meta.xml for hexdecoded_regexed_file_RELATIVE_path: " + hexdecoded_regexed_file_relative_path
272 //+ " - debug version: " + Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path));
273
274 // Does this fileset specify metadata for one file only?
275 is_one_file_only_metadata = true;
276 if (current_filename_element_value.indexOf("*") != -1 && !current_filename_element_value.equals(DIRECTORY_FILENAME)) {
277 // No, it specifies metadata for multiple files (but not all the files in the directory)
278 is_one_file_only_metadata = false;
279 }
280
281 String hexdecoded_current_filename_element_value = FilenameEncoding.decodeStringContainingHexEntities(current_filename_element_value);
282 //System.err.println(" Checking to see if it matches " + hexdecoded_current_filename_element_value + " - debug: " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
283 //System.err.println(" Checking to see if it matches " + current_filename_element_value + " - debug: " + Utility.debugUnicodeString(current_filename_element_value));
284
285 // This fileset specifies metadata for the file
286 // MetadataXMLFile.addMetadata(CollectionTreeNode, ArrayList) stored filename in uppercase hex
287 // so need to make sure everything hex has been decoded (no more hex) to compare apples with apples
288 if (hexdecoded_regexed_file_relative_path.matches(hexdecoded_current_filename_element_value)) { //if (file_relative_path.matches(current_filename_element_value)) {
289 //System.err.println(" @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path);
290 current_fileset_matches = true;
291 if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) {
292 folder_metadata_inherited_from = metadata_xml_file_directory;
293 }
294 break;
295 } //else {
296 //System.err.println( hexdecoded_regexed_file_relative_path + " does not match " + hexdecoded_current_filename_element_value);
297 //System.err.println( Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path) + " does not match " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
298 //}
299
300 // This fileset specifies metadata for the folder the file is in
301 if (hexdecoded_regexed_file_relative_path.startsWith(hexdecoded_current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) {
302 current_fileset_matches = true;
303 folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value);
304 break;
305 }
306 }
307
308 // The FileSet doesn't apply, so move onto the next one
309 if (current_fileset_matches == false) {
310 continue;
311 }
312
313 // Read all the Metadata elements in the fileset
314 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
315 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
316 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
317 String metadata_element_name_full = current_metadata_element.getAttribute("name");
318 // if we're only looking for fileEncoding metadata and this isn't it, skip to the next
319 if(fileEncodingOnly && !metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
320 continue;
321 }
322 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
323
324 // Ignore legacy crap
325 if (metadata_set_namespace.equals("hidden")) {
326 continue;
327 }
328
329 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
330 if (metadata_set == null) {
331 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
332 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
333 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
334 // Skip this element if we still don't have a loaded element for it
335 continue;
336 }
337
338 metadata_element_name_full = target_metadata_element_name_full;
339 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
340 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
341 }
342
343 MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(metadata_element_name_full);
344
345 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
346 // If the element doesn't exist in the metadata set, we're not interested
347 //Shaoqun modified. It needs to be added to metadata_set because the user might disable skim file
348 if (metadata_element == null) {
349 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
350 // continue;
351 }
352
353 // Square brackets need to be escaped because they are a special character in Greenstone
354 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
355 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
356 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
357
358 MetadataValueTreeNode metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
359
360 // If there is no metadata value tree node for this value, create it
361 if (metadata_value_tree_node == null) {
362 DebugStream.println("Note: No value tree node for metadata value \"" + metadata_value_string + "\"");
363 metadata_element.addMetadataValue(metadata_value_string);
364 metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
365 }
366
367 MetadataValue metadata_value = new MetadataValue(metadata_element, metadata_value_tree_node);
368 metadata_value.inheritsMetadataFromFolder(folder_metadata_inherited_from);
369 metadata_value.setIsOneFileOnlyMetadata(is_one_file_only_metadata);
370
371 // Is this accumulating metadata?
372 if (current_metadata_element.getAttribute("mode").equals("accumulate")) {
373 metadata_value.setIsAccumulatingMetadata(true);
374 }
375
376 // Add the new metadata value to the list
377 metadata_values.add(metadata_value);
378 }
379 }
380
381 return metadata_values;
382 }
383
384
385 public void removeMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
386 {
387 // If this metadata.xml file isn't the one currently loaded, load it now
388 if (loaded_file != this) {
389 // First we must save out the currently loaded file
390 saveLoadedFile();
391
392 // Parse the metadata.xml file
393 Document document = XMLTools.parseXMLFile(this);
394 if (document == null) {
395 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
396 return;
397 }
398
399 loaded_file = this;
400 loaded_file_document = document;
401
402 reEncodeFilenamesInMetadataXML(loaded_file_document);
403 }
404
405 // Determine the file's path relative to the location of the metadata.xml file
406 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
407 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
408 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
409 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
410 }
411
412 // Form a regular expression that specifies the scope of the metadata
413 String file_path_regexp;
414 if (file_relative_path.equals("")) {
415 // Special case for matching all files in the directory
416 file_path_regexp = DIRECTORY_FILENAME;
417 }
418 else {
419 // Convert the file path into a regular expression that will match it
420 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
421 }
422
423 // Find the appropriate FileSet element for this file
424 Element appropriate_fileset_element = null;
425
426 // Read all the FileSet elements in the file
427 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
428 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
429 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
430
431 // Check the FileName elements of the FileSet to see if we have a match
432 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
433 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
434 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
435 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
436
437 // Only exact matches can be extended with new metadata
438 if (current_filename_element_value.equals(file_path_regexp)) {
439 appropriate_fileset_element = current_fileset_element;
440 break;
441 }
442 }
443 }
444
445 // If no appropriate FileSet element exists the metadata isn't assigned in this metadata.xml file
446 if (appropriate_fileset_element == null) {
447 DebugStream.println("Note: No appropriate FileSet element found when removing metadata from " + this);
448 return;
449 }
450
451 // Remove each of the metadata values from the FileSet's Description element
452 for (int i = 0; i < metadata_values.size(); i++) {
453 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
454
455 // Remove any characters that are invalid in XML
456 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
457
458 // Square brackets need to be escaped because they are a special character in Greenstone
459 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
460 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
461
462 // Find the Metadata element to delete from the fileset
463 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
464 NodeList metadata_elements_nodelist = appropriate_fileset_element.getElementsByTagName(METADATA_ELEMENT);
465 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
466 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
467
468 // Check the metadata element name matches
469 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
470 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
471 // Check the metadata element value matches
472 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
473 if (current_metadata_value_string.equals(metadata_value_string)) {
474
475 // Remove this Metadata element
476 current_metadata_element.getParentNode().removeChild(current_metadata_element);
477
478 // the gs.filenameEncoding metadata is unique in that, when added, removed or
479 // changed, it must be applied on the file(name) whose metadata has been adjusted
480 if(current_metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
481
482 // metadata_value_string will hereafter be the inherited gs.FilenameEncoding
483 // metadata (if any), now that the value at this level has been removed
484 metadata_value_string = processFilenameEncoding(file_path_regexp,
485 file_node, "", true); // true only if *removing* this meta
486 }
487
488 // If there are no Metadata elements left now, remove the (empty) FileSet element
489 if (metadata_elements_nodelist.getLength() == 0) {
490 appropriate_fileset_element.getParentNode().removeChild(appropriate_fileset_element);
491 }
492
493 break;
494 }
495 }
496 }
497 }
498
499 // Remember that we've changed the file so it gets saved when a new one is loaded
500 loaded_file_changed = true;
501 }
502
503
504 public void replaceMetadata(CollectionTreeNode file_node, MetadataValue old_metadata_value, MetadataValue new_metadata_value)
505 {
506 // If this metadata.xml file isn't the one currently loaded, load it now
507 if (loaded_file != this) {
508 // First we must save out the currently loaded file
509 saveLoadedFile();
510
511 // Parse the metadata.xml file
512 Document document = XMLTools.parseXMLFile(this);
513 if (document == null) {
514 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
515 return;
516 }
517
518 loaded_file = this;
519 loaded_file_document = document;
520
521 reEncodeFilenamesInMetadataXML(loaded_file_document);
522 }
523
524 // Determine the file's path relative to the location of the metadata.xml file
525 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
526 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
527 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
528 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
529 }
530
531 // Form a regular expression that specifies the scope of the metadata
532 String file_path_regexp;
533 if (file_relative_path.equals("")) {
534 // Special case for matching all files in the directory
535 file_path_regexp = DIRECTORY_FILENAME;
536 }
537 else {
538 // Convert the file path into a regular expression that will match it
539 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
540 }
541
542 // Remove any characters that are invalid in XML
543 String old_metadata_value_string = XMLTools.removeInvalidCharacters(old_metadata_value.getFullValue());
544 String new_metadata_value_string = XMLTools.removeInvalidCharacters(new_metadata_value.getFullValue());
545
546 // Square brackets need to be escaped because they are a special character in Greenstone
547 old_metadata_value_string = old_metadata_value_string.replaceAll("\\[", "&#091;");
548 old_metadata_value_string = old_metadata_value_string.replaceAll("\\]", "&#093;");
549 new_metadata_value_string = new_metadata_value_string.replaceAll("\\[", "&#091;");
550 new_metadata_value_string = new_metadata_value_string.replaceAll("\\]", "&#093;");
551
552 // Read all the FileSet elements in the file
553 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
554 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
555 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
556 boolean current_fileset_matches = false;
557
558 // Check the FileName elements of the FileSet to see if we have a match
559 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
560 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
561 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
562 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
563
564 // Only exact matches can be edited
565 if (current_filename_element_value.equals(file_path_regexp)) {
566 current_fileset_matches = true;
567 break;
568 }
569 }
570
571 // The FileSet doesn't apply, so move onto the next one
572 if (current_fileset_matches == false) {
573 continue;
574 }
575
576 // Each metadata value is only allowed to be assigned once
577 boolean new_metadata_value_already_exists = false;
578 Element metadata_element_to_edit = null;
579
580 // Find the Metadata element to replace in the fileset
581 String metadata_element_name_full = old_metadata_value.getMetadataElement().getFullName();
582 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
583 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
584 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
585
586 // Check the metadata element name matches
587 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
588 if (!current_metadata_element_name_full.equals(metadata_element_name_full)) {
589 continue;
590 }
591
592 // Check the new metadata value doesn't already exist
593 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
594 if (current_metadata_value_string.equals(new_metadata_value_string)) {
595 new_metadata_value_already_exists = true;
596 }
597
598 // Check the metadata element value matches
599 if (current_metadata_value_string.equals(old_metadata_value_string)) {
600 metadata_element_to_edit = current_metadata_element;
601 }
602 }
603
604 // If the new metadata value already existed, remove the original value
605 if (new_metadata_value_already_exists) {
606 if(metadata_element_to_edit != null) { //?????????
607 metadata_element_to_edit.getParentNode().removeChild(metadata_element_to_edit);
608 } else {
609 System.err.println("ERROR MetadataXMLFile: metadata_element_to_edit is null");
610 }
611 }
612 // Otherwise replace the old value with the new value
613 // Ensure metadata_element_to_edit isn't null (may occur when multiple files are selected)
614 else if (metadata_element_to_edit != null) {
615
616 // the gs.filenameEncoding metadata is unique in that, when added, removed or
617 // changed, it must be applied on the file(name) whose metadata has been adjusted
618 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
619 new_metadata_value_string = processFilenameEncoding(file_path_regexp, file_node, new_metadata_value_string, false);
620 // true only if removing meta
621 }
622 XMLTools.setElementTextValue(metadata_element_to_edit, new_metadata_value_string);
623 }
624 }
625
626 // Remember that we've changed the file so it gets saved when a new one is loaded
627 loaded_file_changed = true;
628 }
629
630
631 static public void saveLoadedFile()
632 {
633 // If we have a file loaded into memory and it has been modified, save it now
634 if (loaded_file != null && loaded_file_changed == true) {
635 //System.err.println("START saveLoadedFile(), loaded_file_document:\n" + XMLTools.elementToString(loaded_file_document.getDocumentElement(), true));
636
637 XMLTools.writeXMLFile(loaded_file, loaded_file_document, nonEscapingElements);
638
639 /* // DEBUGGING:
640 Document doc = XMLTools.parseXMLFile(loaded_file);
641 System.err.println("AT END saveLoadedFile(), PARSED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
642
643 reEncodeFilenamesInMetadataXML(doc);
644 System.err.println("AT END saveLoadedFile(), RE-ENCODED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
645 */
646 loaded_file_changed = false;
647 }
648
649 //System.err.println("@@@@ END of saveLoadedFile()");
650 //Utility.printCaller();
651 }
652
653 /**
654 * parseXML(metadata.xml) has the side-effect of resolving html entities.
655 * Although this is not done by the GLIEntityResolver usage in parseXML(), something
656 * in parseXML() is resolving the html entities, including those used in carefully
657 * html-entity-escaped filenames.
658 * We need to get the filenames in the DOM correct after parsing a metadata.xml file
659 * into memory, so that we have the correct filenames and so that we'll write it out correctly.
660 * Therefore, always call this method after a successful parseXML() call on a metadata.xml.
661 * @param doc is the Document where the FILENAME_ELEMENTs need to be re-encoded.
662 * At the end of this function, the doc will be modified with the re-encoded filenames.
663 *
664 */
665 static private void reEncodeFilenamesInMetadataXML(Document doc) {
666 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
667 return;
668 }
669
670 //System.err.println("PARSED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
671
672 // Read all the FileSet elements in the file
673 NodeList fileset_elements_nodelist = doc.getElementsByTagName(FILESET_ELEMENT);
674 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
675 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
676
677 // get the value of all FileName elements
678 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
679 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
680 Element filename_element = (Element) filename_elements_nodelist.item(j);
681 String filename = XMLTools.getElementTextValue(filename_element);
682 if(!filename.equals(DIRECTORY_FILENAME)) {
683 //System.err.println("Filename before reencoding was: " + filename);
684 // reencode filename
685 // can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object
686 // created by filenameToURLEncoding).
687 String encoded_filename = filename.replace("\\", "%5C");
688 encoded_filename = FilenameEncoding.filenameToURLEncoding(encoded_filename);
689 // escape chars for regex again
690 encoded_filename = encoded_filename.replace("%5C", "\\");
691 XMLTools.setElementTextValue(filename_element, encoded_filename);
692 //System.err.println("Filename after reencoding was: " + encoded_filename);
693 }
694 }
695 }
696 //System.err.println("RE-ENCODED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
697 }
698
699 /**
700 * Every metadata.xml file must be skimmed when a collection is opened, for three very important reasons:
701 * - To handle any non-namespaced metadata in the metadata.xml files (this is mapped and the files rewritten)
702 * - To get a complete list of the metadata elements in the collection (used in Design and Format panes)
703 * - To build complete and accurate metadata value trees (used in the Enrich pane)
704 */
705 public void skimFile()
706 {
707 boolean file_changed = false;
708
709 // Parse the metadata.xml file
710 DebugStream.println("Skimming metadata.xml file " + this + "...");
711
712 Document document = XMLTools.parseXMLFile(this);
713 if (document == null) {
714 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
715 return;
716 }
717 // Always call this method after calling parseXMLFile
718 reEncodeFilenamesInMetadataXML(document);
719
720 // Read all the Metadata elements in the file
721 HashMap target_metadata_element_name_attrs_cache = new HashMap();
722 NodeList metadata_elements_nodelist = document.getElementsByTagName(METADATA_ELEMENT);
723 for (int i = 0; i < metadata_elements_nodelist.getLength(); i++) {
724 Element current_metadata_element = (Element) metadata_elements_nodelist.item(i);
725 String metadata_element_name_full = current_metadata_element.getAttribute("name");
726 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
727
728 // Ignore legacy crap
729 if (metadata_set_namespace.equals("hidden")) {
730 continue;
731 }
732
733 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
734 if (metadata_set == null) {
735 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
736 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
737 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
738 // Skip this element if we still don't have a loaded element for it
739 continue;
740 }
741
742 // Update the metadata.xml file to have the new (namespaced) element name
743 // Instead of using current_metadata_element.setAttribute("name", target_metadata_element_name_full)
744 // we create an Attr object for each target metadata element name, and cache them
745 // This makes a *huge* difference (namespacing a metadata.xml file with 45000 metadata entries now
746 // takes 45 seconds instead of 30 minutes!) -- why is setting the value of a Node so slow?
747 Attr target_metadata_element_name_attr = (Attr) target_metadata_element_name_attrs_cache.get(target_metadata_element_name_full);
748 if (target_metadata_element_name_attr == null) {
749 target_metadata_element_name_attr = document.createAttribute("name");
750 target_metadata_element_name_attr.setValue(target_metadata_element_name_full);
751 target_metadata_element_name_attrs_cache.put(target_metadata_element_name_full, target_metadata_element_name_attr);
752 }
753
754 // Remove the old name attribute and add the new (namespaced) one
755 current_metadata_element.removeAttribute("name");
756 current_metadata_element.setAttributeNode((Attr) target_metadata_element_name_attr.cloneNode(false));
757 file_changed = true;
758
759 metadata_element_name_full = target_metadata_element_name_full;
760 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
761 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
762 }
763
764 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
765 MetadataElement metadata_element = metadata_set.getMetadataElementWithName(metadata_element_name);
766
767 // If the element doesn't exist in the metadata set, add it
768 if (metadata_element == null) {
769 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
770 }
771
772 // Square brackets need to be escaped because they are a special character in Greenstone
773 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
774 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
775 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
776
777 metadata_element.addMetadataValue(metadata_value_string);
778 }
779
780 // Rewrite the metadata.xml file if it has changed
781 if (file_changed) {
782 XMLTools.writeXMLFile(this, document);
783 }
784 }
785
786 /**
787 * The gs.filenameEncoding metadata is unique in that, when added, removed or
788 * replaced, it must be applied on the file(name) whose metadata has been
789 * adjusted.
790 * This method handles all that, given the regular expression or filepath name
791 * to match on (.* matches subdirectories), the affected fileNode, the new
792 * encoding value and whether a new encoding value has been added/an existing
793 * one has been replaced or whether the encoding metadata has been removed.
794 * The new adjusted value for the encoding metadata is returned.
795 *
796 * MetadataXMLFileManager maintains a hashmap of (URL-encoded filepaths, encoding)
797 * to allow fast access to previously assigned gs.filenameEncoding metadata (if
798 * any) for each file. This hashmap also needs to be updated, but this update
799 * is complicated by the fact that it concerns regular expressions that could
800 * affect multiple filenames.
801 */
802 public String processFilenameEncoding(String file_path_regexp, CollectionTreeNode file_node,
803 String encoding_metadata_value, boolean removingMetadata)
804 {
805 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
806 return encoding_metadata_value;
807 }
808
809 // Work out this filenode's new encoding and apply it:
810
811 if(removingMetadata) { // encoding_metadata_value = ""
812 // gs.filenameEncoding metadata being removed, work out
813 // any inherited metadata to replace it with in the meta-table
814 encoding_metadata_value = FilenameEncoding.getInheritedFilenameEncoding(
815 file_node.getURLEncodedFilePath(), file_node.getFile());
816 // should be canonical encoding already
817 }
818 else if(!encoding_metadata_value.equals("")) {
819 // if adding or replacing filename encoding,
820 // get the canonical encoding name for this alias
821 encoding_metadata_value = FilenameEncoding.canonicalEncodingName(encoding_metadata_value);
822 }
823 // Reencode the display of this filenode only as any affected
824 // childnodes will be reencoded on FileNode.refreshDescendantEncodings()
825 file_node.reencodeDisplayName(encoding_metadata_value);
826
827
828 // Whether removing or adding/replacing the file's gs.filename encoding meta,
829 // store this in the file-to-encoding map for fast access, since the map stores
830 // empty string values when no meta has been assigned at this file level.
831 // In the case of removingMetadata, the value stored will be the fallback value
832
833 String urlpath = file_node.getURLEncodedFilePath();
834 if(removingMetadata) {
835 // remove it from the map instead of inserting "", so that when folders in the collectiontree
836 // are being deleted or shifted, the removemetada (and addmetadata) calls that get fired
837 // for each affected filenodes does not cause the undesirable effect of multiple "" to be
838 // entered into the filename-to-encoding map for filepaths that no longer exist .
839 FilenameEncoding.map.remove(urlpath);
840 } else { // for adding and replacing, put the encoding into the map (also replaces any existing encoding for it)
841 FilenameEncoding.map.put(urlpath, encoding_metadata_value);
842 }
843
844 // If new folder-level metadata (or metadata for a set of files fitting a pattern) has been
845 // assigned, the file_to_encodings map will be cleared for all descendant folders and files,
846 // so that these can be re-calculated upon refreshing the visible parts of the CollectionTree.
847 // Mark the state as requiring a refresh of the CollectionTree.
848 // This next step also serves to prevent the MetadataValueTableModel from trying to update
849 // itself while a refresh (involving re-encoding of filenames of visible nodes) is in progress.
850 FilenameEncoding.setRefreshRequired(true);
851
852 return encoding_metadata_value;
853 }
854}
Note: See TracBrowser for help on using the repository browser.