source: main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java@ 33747

Last change on this file since 33747 was 33747, checked in by ak19, 4 years ago

Tidying up code some more and moving unused (but reusable and possibly useful) FilenameEncoding.java functions to end of file.

  • Property svn:keywords set to Author Date Id Revision
File size: 41.0 KB
Line 
1/**
2 *############################################################################
3 * A component of the Greenstone Librarian Interface, part of the Greenstone
4 * digital library suite from the New Zealand Digital Library Project at the
5 * University of Waikato, New Zealand.
6 *
7 * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ
8 *
9 * Copyright (C) 2004 New Zealand Digital Library Project
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *############################################################################
25 */
26
27package org.greenstone.gatherer.metadata;
28
29
30import java.io.*;
31import java.util.*;
32import org.greenstone.gatherer.DebugStream;
33import org.greenstone.gatherer.collection.CollectionTreeNode;
34import org.greenstone.gatherer.util.XMLTools;
35import org.w3c.dom.*;
36
37import org.greenstone.gatherer.util.Utility;
38
39/** This class represents one metadata.xml file */
40public class MetadataXMLFile
41 extends File
42{
43 static final private String DESCRIPTION_ELEMENT = "Description";
44 static final private String DIRECTORY_FILENAME = ".*";
45 static final private String FILENAME_ELEMENT = "FileName";
46 static final private String FILESET_ELEMENT = "FileSet";
47 static final private String METADATA_ELEMENT = "Metadata";
48 static final private String[] nonEscapingElements = new String[]{FILENAME_ELEMENT};
49
50 /** Special metadata field: the filename encoding is a unique sort of metadata in
51 * that it is not just information stored with a collection file, but also needs to
52 * be applied in real-time to the collection file (to its filename) for display. */
53 static final public String FILENAME_ENCODING_METADATA = "gs.filenameEncoding";
54
55 // To speed things up a bit we keep the last accessed metadata.xml file in memory
56 static private File loaded_file = null;
57 static private Document loaded_file_document = null;
58 static private boolean loaded_file_changed = false;
59
60
61 public MetadataXMLFile(String metadata_xml_file_path)
62 {
63 super(metadata_xml_file_path);
64 }
65
66
67 public void addMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
68 {
69 // If this metadata.xml file isn't the one currently loaded, load it now
70 if (loaded_file != this) {
71 // First we must save out the currently loaded file
72 saveLoadedFile();
73
74 // Parse the metadata.xml file
75 Document document = XMLTools.parseXMLFile(this);
76 if (document == null) {
77 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
78 return;
79 }
80
81 loaded_file = this;
82 loaded_file_document = document;
83 reEncodeFilenamesInMetadataXML(loaded_file_document);
84
85 }
86
87 // Determine the file's path relative to the location of the metadata.xml file
88 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
89 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
90 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
91 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
92 }
93
94 // Form a regular expression that specifies the scope of the metadata
95 String file_path_regexp;
96 if (file_relative_path.equals("")) {
97 // Special case for matching all files in the directory
98 file_path_regexp = DIRECTORY_FILENAME;
99 }
100 else {
101 // Convert the file path into a regular expression that will match it
102 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
103 }
104
105 //System.err.println("MetadataXMLFile.addMetadata() Adding meta for file regexp: "
106 // + file_path_regexp + " - " + org.greenstone.gatherer.util.Utility.debugUnicodeString(file_path_regexp));
107
108 // Find the appropriate FileSet element for this file
109 Element appropriate_fileset_element = null;
110
111 // Read all the FileSet elements in the file
112 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
113 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
114 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
115
116 // Check the FileName elements of the FileSet to see if we have a match
117 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
118 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
119 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
120 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
121
122 // Only exact matches can be extended with new metadata
123 if (current_filename_element_value.equals(file_path_regexp)) {
124 appropriate_fileset_element = current_fileset_element;
125 break;
126 }
127 }
128 }
129
130 // If no appropriate FileSet element exists create a new one for this file
131 if (appropriate_fileset_element == null) {
132 DebugStream.println("Creating new FileSet element for file since none exists..."+file_path_regexp);
133 appropriate_fileset_element = loaded_file_document.createElement(FILESET_ELEMENT);
134
135 Element new_filename_element = loaded_file_document.createElement(FILENAME_ELEMENT);
136 new_filename_element.appendChild(loaded_file_document.createTextNode(file_path_regexp));
137 appropriate_fileset_element.appendChild(new_filename_element);
138
139 Element new_description_element = loaded_file_document.createElement(DESCRIPTION_ELEMENT);
140 appropriate_fileset_element.appendChild(new_description_element);
141
142 // add the fileset element for .* at the top: especially important for
143 // non-accumulating (and override mode) meta. Other type fileset elements can be appended
144 if(file_path_regexp.equals(DIRECTORY_FILENAME)) {
145 loaded_file_document.getDocumentElement().insertBefore(appropriate_fileset_element,
146 loaded_file_document.getDocumentElement().getFirstChild());
147 } else {
148 loaded_file_document.getDocumentElement().appendChild(appropriate_fileset_element);
149 }
150 }
151
152 // Add each of the metadata values to the FileSet's Description element
153 Element description_element = (Element) appropriate_fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT).item(0);
154 for (int i = 0; i < metadata_values.size(); i++) {
155 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
156 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
157
158 // Remove any characters that are invalid in XML
159 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
160
161 // Square brackets need to be escaped because they are a special character in Greenstone
162 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
163 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
164
165 // the gs.filenameEncoding metadata is unique in that, when added, removed or
166 // changed, it must be applied on the file(name) whose metadata has been adjusted
167 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
168 metadata_value_string = processFilenameEncoding(file_path_regexp,
169 file_node, metadata_value_string, false);
170 // true only if removing meta
171 }
172
173 // Check if this piece of metadata has already been assigned to this FileSet element
174 boolean metadata_already_assigned = false;
175 NodeList metadata_elements_nodelist = description_element.getElementsByTagName(METADATA_ELEMENT);
176 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
177 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
178
179 // Check if the metadata element name matches
180 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
181 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
182 // if the metadata must not accumulate, then edit the current value
183 if (!metadata_value.isAccumulatingMetadata()) {
184 XMLTools.setNodeText(current_metadata_element, metadata_value_string);
185 metadata_already_assigned = true;
186 break;
187 }
188 // Check if the metadata element value matches
189 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
190 if (current_metadata_value_string.equals(metadata_value_string)) {
191 // Metadata already assigned
192 metadata_already_assigned = true;
193 break;
194 }
195 }
196 }
197
198 // If the piece of metadata hasn't already been assigned, add it now
199 if (!metadata_already_assigned) {
200 // Create a new Metadata element to record this metadata
201 Element new_metadata_element = loaded_file_document.createElement(METADATA_ELEMENT);
202 new_metadata_element.setAttribute("name", metadata_value.getMetadataElement().getFullName());
203 new_metadata_element.setAttribute("mode", (metadata_value.isAccumulatingMetadata() ? "accumulate" : "override"));
204 new_metadata_element.appendChild(loaded_file_document.createTextNode(metadata_value_string));
205
206 // Accumulating metadata: add at the end
207 if (metadata_value.isAccumulatingMetadata()) {
208 description_element.appendChild(new_metadata_element);
209 }
210 // Override metadata: add at the start (so it overrides inherited metadata without affecting other assigned metadata)
211 else {
212 description_element.insertBefore(new_metadata_element, description_element.getFirstChild());
213 }
214 }
215 }
216
217 // Remember that we've changed the file so it gets saved when a new one is loaded
218 loaded_file_changed = true;
219 }
220
221
222 public ArrayList getMetadataAssignedToFile(File file, boolean fileEncodingOnly)
223 {
224 // If this metadata.xml file isn't the one currently loaded, load it now
225 if (loaded_file != this) {
226 // First we must save out the currently loaded file
227 saveLoadedFile();
228
229 // Parse the metadata.xml file
230 Document document = XMLTools.parseXMLFile(this);
231 if (document == null) {
232 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
233 return new ArrayList();
234 }
235
236 loaded_file = this;
237 loaded_file_document = document;
238
239 reEncodeFilenamesInMetadataXML(loaded_file_document);
240 }
241
242 // Determine the file's path relative to the location of the metadata.xml file
243 String file_relative_path = FilenameEncoding.fileToURLEncoding(file);
244 File metadata_xml_file_directory = getParentFile();
245 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(metadata_xml_file_directory);
246 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
247
248 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
249 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
250 }
251
252 // Build up a list of metadata assigned to this file
253 ArrayList metadata_values = new ArrayList();
254
255 // Read all the FileSet elements in the file
256 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
257 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
258 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
259 boolean current_fileset_matches = false;
260 boolean is_one_file_only_metadata = true;
261 File folder_metadata_inherited_from = null;
262
263 // Check the FileName elements of the FileSet to see if we have a match
264 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
265 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
266 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
267 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
268
269 //System.err.println("\n Original TAIL filename was: " + Utility.debugUnicodeString(file.getName()));
270 String hexdecoded_regexed_file_relative_path = FilenameEncoding.decodeStringContainingHexEntities(file_relative_path);
271 //System.err.println("Looking in meta.xml for hexdecoded_regexed_file_RELATIVE_path: " + hexdecoded_regexed_file_relative_path
272 //+ " - debug version: " + Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path));
273
274 // Does this fileset specify metadata for one file only?
275 is_one_file_only_metadata = true;
276 if (current_filename_element_value.indexOf("*") != -1 && !current_filename_element_value.equals(DIRECTORY_FILENAME)) {
277 // No, it specifies metadata for multiple files (but not all the files in the directory)
278 is_one_file_only_metadata = false;
279 }
280
281 String hexdecoded_current_filename_element_value = FilenameEncoding.decodeStringContainingHexEntities(current_filename_element_value);
282 //System.err.println(" Checking to see if it matches " + hexdecoded_current_filename_element_value + " - debug: " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
283 //System.err.println(" Checking to see if it matches " + current_filename_element_value + " - debug: " + Utility.debugUnicodeString(current_filename_element_value));
284
285 // This fileset specifies metadata for the file
286 // MetadataXMLFile.addMetadata(CollectionTreeNode, ArrayList) stored filename in uppercase hex
287 // so need to make sure everything hex has been decoded (no more hex) to compare apples with apples
288 if (hexdecoded_regexed_file_relative_path.matches(hexdecoded_current_filename_element_value)) { //if (file_relative_path.matches(current_filename_element_value)) {
289 //System.err.println(" @@@ Found a match in meta.xml for hexdecoded_regexed_file_relative_path: " + hexdecoded_regexed_file_relative_path + "\n");
290 current_fileset_matches = true;
291 if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) {
292 folder_metadata_inherited_from = metadata_xml_file_directory;
293 }
294 break;
295 } //else {
296 //System.err.println( hexdecoded_regexed_file_relative_path + " does not match " + hexdecoded_current_filename_element_value);
297 //System.err.println( Utility.debugUnicodeString(hexdecoded_regexed_file_relative_path) + " does not match " + Utility.debugUnicodeString(hexdecoded_current_filename_element_value));
298 //}
299
300 // This fileset specifies metadata for the folder the file is in
301 if (hexdecoded_regexed_file_relative_path.startsWith(hexdecoded_current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) {
302 current_fileset_matches = true;
303 folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value);
304 break;
305 }
306 }
307
308 // The FileSet doesn't apply, so move onto the next one
309 if (current_fileset_matches == false) {
310 continue;
311 }
312
313 // Read all the Metadata elements in the fileset
314 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
315 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
316 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
317 String metadata_element_name_full = current_metadata_element.getAttribute("name");
318 // if we're only looking for fileEncoding metadata and this isn't it, skip to the next
319 if(fileEncodingOnly && !metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
320 continue;
321 }
322 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
323
324 // Ignore legacy crap
325 if (metadata_set_namespace.equals("hidden")) {
326 continue;
327 }
328
329 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
330 if (metadata_set == null) {
331 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
332 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
333 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
334 // Skip this element if we still don't have a loaded element for it
335 continue;
336 }
337
338 metadata_element_name_full = target_metadata_element_name_full;
339 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
340 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
341 }
342
343 MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(metadata_element_name_full);
344
345 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
346 // If the element doesn't exist in the metadata set, we're not interested
347 //Shaoqun modified. It needs to be added to metadata_set because the user might disable skim file
348 if (metadata_element == null) {
349 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
350 // continue;
351 }
352
353 // Square brackets need to be escaped because they are a special character in Greenstone
354 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
355 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
356 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
357
358 MetadataValueTreeNode metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
359
360 // If there is no metadata value tree node for this value, create it
361 if (metadata_value_tree_node == null) {
362 DebugStream.println("Note: No value tree node for metadata value \"" + metadata_value_string + "\"");
363 metadata_element.addMetadataValue(metadata_value_string);
364 metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
365 }
366
367 MetadataValue metadata_value = new MetadataValue(metadata_element, metadata_value_tree_node);
368 metadata_value.inheritsMetadataFromFolder(folder_metadata_inherited_from);
369 metadata_value.setIsOneFileOnlyMetadata(is_one_file_only_metadata);
370
371 // Is this accumulating metadata?
372 if (current_metadata_element.getAttribute("mode").equals("accumulate")) {
373 metadata_value.setIsAccumulatingMetadata(true);
374 }
375
376 // Add the new metadata value to the list
377 metadata_values.add(metadata_value);
378 }
379 }
380
381 return metadata_values;
382 }
383
384
385 public void removeMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
386 {
387 // If this metadata.xml file isn't the one currently loaded, load it now
388 if (loaded_file != this) {
389 // First we must save out the currently loaded file
390 saveLoadedFile();
391
392 // Parse the metadata.xml file
393 Document document = XMLTools.parseXMLFile(this);
394 if (document == null) {
395 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
396 return;
397 }
398
399 loaded_file = this;
400 loaded_file_document = document;
401
402 reEncodeFilenamesInMetadataXML(loaded_file_document);
403 }
404
405 // Determine the file's path relative to the location of the metadata.xml file
406 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
407 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
408 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
409 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
410 }
411
412 // Form a regular expression that specifies the scope of the metadata
413 String file_path_regexp;
414 if (file_relative_path.equals("")) {
415 // Special case for matching all files in the directory
416 file_path_regexp = DIRECTORY_FILENAME;
417 }
418 else {
419 // Convert the file path into a regular expression that will match it
420 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
421 }
422
423 // Find the appropriate FileSet element for this file
424 Element appropriate_fileset_element = null;
425
426 // Read all the FileSet elements in the file
427 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
428 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
429 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
430
431 // Check the FileName elements of the FileSet to see if we have a match
432 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
433 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
434 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
435 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
436
437 // Only exact matches can be extended with new metadata
438 if (current_filename_element_value.equals(file_path_regexp)) {
439 appropriate_fileset_element = current_fileset_element;
440 break;
441 }
442 }
443 }
444
445 // If no appropriate FileSet element exists the metadata isn't assigned in this metadata.xml file
446 if (appropriate_fileset_element == null) {
447 DebugStream.println("Note: No appropriate FileSet element found when removing metadata from " + this);
448 return;
449 }
450
451 // Remove each of the metadata values from the FileSet's Description element
452 for (int i = 0; i < metadata_values.size(); i++) {
453 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
454
455 // Remove any characters that are invalid in XML
456 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
457
458 // Square brackets need to be escaped because they are a special character in Greenstone
459 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
460 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
461
462 // Find the Metadata element to delete from the fileset
463 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
464 NodeList metadata_elements_nodelist = appropriate_fileset_element.getElementsByTagName(METADATA_ELEMENT);
465 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
466 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
467
468 // Check the metadata element name matches
469 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
470 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
471 // Check the metadata element value matches
472 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
473 if (current_metadata_value_string.equals(metadata_value_string)) {
474
475 // Remove this Metadata element
476 current_metadata_element.getParentNode().removeChild(current_metadata_element);
477
478 // the gs.filenameEncoding metadata is unique in that, when added, removed or
479 // changed, it must be applied on the file(name) whose metadata has been adjusted
480 if(current_metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
481
482 // metadata_value_string will hereafter be the inherited gs.FilenameEncoding
483 // metadata (if any), now that the value at this level has been removed
484 metadata_value_string = processFilenameEncoding(file_path_regexp,
485 file_node, "", true); // true only if *removing* this meta
486 }
487
488 // If there are no Metadata elements left now, remove the (empty) FileSet element
489 if (metadata_elements_nodelist.getLength() == 0) {
490 appropriate_fileset_element.getParentNode().removeChild(appropriate_fileset_element);
491 }
492
493 break;
494 }
495 }
496 }
497 }
498
499 // Remember that we've changed the file so it gets saved when a new one is loaded
500 loaded_file_changed = true;
501 }
502
503
504 public void replaceMetadata(CollectionTreeNode file_node, MetadataValue old_metadata_value, MetadataValue new_metadata_value)
505 {
506 // If this metadata.xml file isn't the one currently loaded, load it now
507 if (loaded_file != this) {
508 // First we must save out the currently loaded file
509 saveLoadedFile();
510
511 // Parse the metadata.xml file
512 Document document = XMLTools.parseXMLFile(this);
513 if (document == null) {
514 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
515 return;
516 }
517
518 loaded_file = this;
519 loaded_file_document = document;
520
521 reEncodeFilenamesInMetadataXML(loaded_file_document);
522 }
523
524 // Determine the file's path relative to the location of the metadata.xml file
525 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
526 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
527 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
528 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
529 }
530
531 // Form a regular expression that specifies the scope of the metadata
532 String file_path_regexp;
533 if (file_relative_path.equals("")) {
534 // Special case for matching all files in the directory
535 file_path_regexp = DIRECTORY_FILENAME;
536 }
537 else {
538 // Convert the file path into a regular expression that will match it
539 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
540 }
541
542 // Remove any characters that are invalid in XML
543 String old_metadata_value_string = XMLTools.removeInvalidCharacters(old_metadata_value.getFullValue());
544 String new_metadata_value_string = XMLTools.removeInvalidCharacters(new_metadata_value.getFullValue());
545
546 // Square brackets need to be escaped because they are a special character in Greenstone
547 old_metadata_value_string = old_metadata_value_string.replaceAll("\\[", "&#091;");
548 old_metadata_value_string = old_metadata_value_string.replaceAll("\\]", "&#093;");
549 new_metadata_value_string = new_metadata_value_string.replaceAll("\\[", "&#091;");
550 new_metadata_value_string = new_metadata_value_string.replaceAll("\\]", "&#093;");
551
552 // Read all the FileSet elements in the file
553 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
554 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
555 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
556 boolean current_fileset_matches = false;
557
558 // Check the FileName elements of the FileSet to see if we have a match
559 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
560 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
561 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
562 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
563
564 // Only exact matches can be edited
565 if (current_filename_element_value.equals(file_path_regexp)) {
566 current_fileset_matches = true;
567 break;
568 }
569 }
570
571 // The FileSet doesn't apply, so move onto the next one
572 if (current_fileset_matches == false) {
573 continue;
574 }
575
576 // Each metadata value is only allowed to be assigned once
577 boolean new_metadata_value_already_exists = false;
578 Element metadata_element_to_edit = null;
579
580 // Find the Metadata element to replace in the fileset
581 String metadata_element_name_full = old_metadata_value.getMetadataElement().getFullName();
582 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
583 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
584 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
585
586 // Check the metadata element name matches
587 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
588 if (!current_metadata_element_name_full.equals(metadata_element_name_full)) {
589 continue;
590 }
591
592 // Check the new metadata value doesn't already exist
593 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
594 if (current_metadata_value_string.equals(new_metadata_value_string)) {
595 new_metadata_value_already_exists = true;
596 }
597
598 // Check the metadata element value matches
599 if (current_metadata_value_string.equals(old_metadata_value_string)) {
600 metadata_element_to_edit = current_metadata_element;
601 }
602 }
603
604 // If the new metadata value already existed, remove the original value
605 if (new_metadata_value_already_exists) {
606 if(metadata_element_to_edit != null) { //?????????
607 metadata_element_to_edit.getParentNode().removeChild(metadata_element_to_edit);
608 } else {
609 System.err.println("ERROR MetadataXMLFile: metadata_element_to_edit is null");
610 }
611 }
612 // Otherwise replace the old value with the new value
613 // Ensure metadata_element_to_edit isn't null (may occur when multiple files are selected)
614 else if (metadata_element_to_edit != null) {
615
616 // the gs.filenameEncoding metadata is unique in that, when added, removed or
617 // changed, it must be applied on the file(name) whose metadata has been adjusted
618 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
619 new_metadata_value_string = processFilenameEncoding(file_path_regexp, file_node, new_metadata_value_string, false);
620 // true only if removing meta
621 }
622 XMLTools.setElementTextValue(metadata_element_to_edit, new_metadata_value_string);
623 }
624 }
625
626 // Remember that we've changed the file so it gets saved when a new one is loaded
627 loaded_file_changed = true;
628 }
629
630
631 static public void saveLoadedFile()
632 {
633 // If we have a file loaded into memory and it has been modified, save it now
634 if (loaded_file != null && loaded_file_changed == true) {
635 //System.err.println("START saveLoadedFile(), loaded_file_document:\n" + XMLTools.elementToString(loaded_file_document.getDocumentElement(), true));
636
637 XMLTools.writeXMLFile(loaded_file, loaded_file_document, nonEscapingElements);
638
639 loaded_file_changed = false;
640 }
641 }
642
643 /**
644 * parseXML(metadata.xml) has the side-effect of resolving html entities.
645 * Although this is not done by the GLIEntityResolver usage in parseXML(), something
646 * in parseXML() is resolving the html entities, including those used in carefully
647 * html-entity-escaped filenames.
648 * We need to get the filenames in the DOM correct after parsing a metadata.xml file
649 * into memory, so that we have the correct filenames and so that we'll write it out correctly.
650 * Therefore, always call this method after a successful parseXML() call on a metadata.xml.
651 * @param doc is the Document where the FILENAME_ELEMENTs need to be re-encoded.
652 * At the end of this function, the doc will be modified with the re-encoded filenames.
653 *
654 */
655 static private void reEncodeFilenamesInMetadataXML(Document doc) {
656 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
657 return;
658 }
659
660 String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding("."); // returns the curr dir path after removing the /./ at end
661 //System.err.println("@@@ curr_directory_path: " + curr_directory_path);
662
663 //System.err.println("PARSED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
664
665 // Read all the FileSet elements in the file
666 NodeList fileset_elements_nodelist = doc.getElementsByTagName(FILESET_ELEMENT);
667 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
668 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
669
670 // get the value of all FileName elements
671 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
672 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
673 Element filename_element = (Element) filename_elements_nodelist.item(j);
674 String filename = XMLTools.getElementTextValue(filename_element);
675 if(!filename.equals(DIRECTORY_FILENAME)) {
676 // Reencode filename after parseXML() had the side-effect of decoding entities in filename elements
677
678 //System.err.println("Filename before reencoding was: " + filename);
679
680 // Can't convert to URI with backslash-escaped chars (backslash used in regexed filename are illegal in URI object
681 // created by filenameToURLEncoding). So replace backslashes in regex with url-encoded hex-value of backslash, %5C.
682 String encoded_filename = filename.replace("\\", "%5C");
683
684 // get the URL encoded filename preserving special encodings, with any curr_directory_path prefix removed
685 encoded_filename = FilenameEncoding.filenameToURLEncodingWithPrefixRemoved(encoded_filename, curr_directory_path);
686
687 // Reintrodudce the backslash characters in place of their %5C hex placeholders
688 encoded_filename = encoded_filename.replace("%5C", "\\");
689
690 // Update filename element in DOM
691 XMLTools.setElementTextValue(filename_element, encoded_filename);
692 //System.err.println("Filename after reencoding was: " + encoded_filename);
693 }
694 }
695 }
696 //System.err.println("RE-ENCODED loaded_file contains:\n" + XMLTools.elementToString(doc.getDocumentElement(), true));
697 }
698
699 /**
700 * Every metadata.xml file must be skimmed when a collection is opened, for three very important reasons:
701 * - To handle any non-namespaced metadata in the metadata.xml files (this is mapped and the files rewritten)
702 * - To get a complete list of the metadata elements in the collection (used in Design and Format panes)
703 * - To build complete and accurate metadata value trees (used in the Enrich pane)
704 */
705 public void skimFile()
706 {
707 boolean file_changed = false;
708
709 // Parse the metadata.xml file
710 DebugStream.println("Skimming metadata.xml file " + this + "...");
711
712 Document document = XMLTools.parseXMLFile(this);
713 if (document == null) {
714 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
715 return;
716 }
717 // Always call this method after calling parseXMLFile
718 reEncodeFilenamesInMetadataXML(document);
719
720 // Read all the Metadata elements in the file
721 HashMap target_metadata_element_name_attrs_cache = new HashMap();
722 NodeList metadata_elements_nodelist = document.getElementsByTagName(METADATA_ELEMENT);
723 for (int i = 0; i < metadata_elements_nodelist.getLength(); i++) {
724 Element current_metadata_element = (Element) metadata_elements_nodelist.item(i);
725 String metadata_element_name_full = current_metadata_element.getAttribute("name");
726 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
727
728 // Ignore legacy crap
729 if (metadata_set_namespace.equals("hidden")) {
730 continue;
731 }
732
733 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
734 if (metadata_set == null) {
735 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
736 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
737 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
738 // Skip this element if we still don't have a loaded element for it
739 continue;
740 }
741
742 // Update the metadata.xml file to have the new (namespaced) element name
743 // Instead of using current_metadata_element.setAttribute("name", target_metadata_element_name_full)
744 // we create an Attr object for each target metadata element name, and cache them
745 // This makes a *huge* difference (namespacing a metadata.xml file with 45000 metadata entries now
746 // takes 45 seconds instead of 30 minutes!) -- why is setting the value of a Node so slow?
747 Attr target_metadata_element_name_attr = (Attr) target_metadata_element_name_attrs_cache.get(target_metadata_element_name_full);
748 if (target_metadata_element_name_attr == null) {
749 target_metadata_element_name_attr = document.createAttribute("name");
750 target_metadata_element_name_attr.setValue(target_metadata_element_name_full);
751 target_metadata_element_name_attrs_cache.put(target_metadata_element_name_full, target_metadata_element_name_attr);
752 }
753
754 // Remove the old name attribute and add the new (namespaced) one
755 current_metadata_element.removeAttribute("name");
756 current_metadata_element.setAttributeNode((Attr) target_metadata_element_name_attr.cloneNode(false));
757 file_changed = true;
758
759 metadata_element_name_full = target_metadata_element_name_full;
760 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
761 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
762 }
763
764 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
765 MetadataElement metadata_element = metadata_set.getMetadataElementWithName(metadata_element_name);
766
767 // If the element doesn't exist in the metadata set, add it
768 if (metadata_element == null) {
769 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
770 }
771
772 // Square brackets need to be escaped because they are a special character in Greenstone
773 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
774 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
775 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
776
777 metadata_element.addMetadataValue(metadata_value_string);
778 }
779
780 // Rewrite the metadata.xml file if it has changed
781 if (file_changed) {
782 XMLTools.writeXMLFile(this, document);
783 }
784 }
785
786 /**
787 * The gs.filenameEncoding metadata is unique in that, when added, removed or
788 * replaced, it must be applied on the file(name) whose metadata has been
789 * adjusted.
790 * This method handles all that, given the regular expression or filepath name
791 * to match on (.* matches subdirectories), the affected fileNode, the new
792 * encoding value and whether a new encoding value has been added/an existing
793 * one has been replaced or whether the encoding metadata has been removed.
794 * The new adjusted value for the encoding metadata is returned.
795 *
796 * MetadataXMLFileManager maintains a hashmap of (URL-encoded filepaths, encoding)
797 * to allow fast access to previously assigned gs.filenameEncoding metadata (if
798 * any) for each file. This hashmap also needs to be updated, but this update
799 * is complicated by the fact that it concerns regular expressions that could
800 * affect multiple filenames.
801 */
802 public String processFilenameEncoding(String file_path_regexp, CollectionTreeNode file_node,
803 String encoding_metadata_value, boolean removingMetadata)
804 {
805 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
806 return encoding_metadata_value;
807 }
808
809 // Work out this filenode's new encoding and apply it:
810
811 if(removingMetadata) { // encoding_metadata_value = ""
812 // gs.filenameEncoding metadata being removed, work out
813 // any inherited metadata to replace it with in the meta-table
814 encoding_metadata_value = FilenameEncoding.getInheritedFilenameEncoding(
815 file_node.getURLEncodedFilePath(), file_node.getFile());
816 // should be canonical encoding already
817 }
818 else if(!encoding_metadata_value.equals("")) {
819 // if adding or replacing filename encoding,
820 // get the canonical encoding name for this alias
821 encoding_metadata_value = FilenameEncoding.canonicalEncodingName(encoding_metadata_value);
822 }
823 // Reencode the display of this filenode only as any affected
824 // childnodes will be reencoded on FileNode.refreshDescendantEncodings()
825 file_node.reencodeDisplayName(encoding_metadata_value);
826
827
828 // Whether removing or adding/replacing the file's gs.filename encoding meta,
829 // store this in the file-to-encoding map for fast access, since the map stores
830 // empty string values when no meta has been assigned at this file level.
831 // In the case of removingMetadata, the value stored will be the fallback value
832
833 String urlpath = file_node.getURLEncodedFilePath();
834 if(removingMetadata) {
835 // remove it from the map instead of inserting "", so that when folders in the collectiontree
836 // are being deleted or shifted, the removemetada (and addmetadata) calls that get fired
837 // for each affected filenodes does not cause the undesirable effect of multiple "" to be
838 // entered into the filename-to-encoding map for filepaths that no longer exist .
839 FilenameEncoding.map.remove(urlpath);
840 } else { // for adding and replacing, put the encoding into the map (also replaces any existing encoding for it)
841 FilenameEncoding.map.put(urlpath, encoding_metadata_value);
842 }
843
844 // If new folder-level metadata (or metadata for a set of files fitting a pattern) has been
845 // assigned, the file_to_encodings map will be cleared for all descendant folders and files,
846 // so that these can be re-calculated upon refreshing the visible parts of the CollectionTree.
847 // Mark the state as requiring a refresh of the CollectionTree.
848 // This next step also serves to prevent the MetadataValueTableModel from trying to update
849 // itself while a refresh (involving re-encoding of filenames of visible nodes) is in progress.
850 FilenameEncoding.setRefreshRequired(true);
851
852 return encoding_metadata_value;
853 }
854}
Note: See TracBrowser for help on using the repository browser.