source: main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java@ 31814

Last change on this file since 31814 was 29793, checked in by ak19, 9 years ago

some more changes to make adding metadata to filenames with non latin1 characters work. previously, any char above 127 was being output as %XX. this doesn't work as higher values can end up like %101. but when you decode you only ever look for 2 digits after the %. so higher values we'll use entities like ā then had to modify the xml writing to not escape the text in hte filename element - otherwise get ā

  • Property svn:keywords set to Author Date Id Revision
File size: 35.5 KB
Line 
1/**
2 *############################################################################
3 * A component of the Greenstone Librarian Interface, part of the Greenstone
4 * digital library suite from the New Zealand Digital Library Project at the
5 * University of Waikato, New Zealand.
6 *
7 * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ
8 *
9 * Copyright (C) 2004 New Zealand Digital Library Project
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *############################################################################
25 */
26
27package org.greenstone.gatherer.metadata;
28
29
30import java.io.*;
31import java.util.*;
32import org.greenstone.gatherer.DebugStream;
33import org.greenstone.gatherer.collection.CollectionTreeNode;
34import org.greenstone.gatherer.util.XMLTools;
35import org.w3c.dom.*;
36
37
38/** This class represents one metadata.xml file */
39public class MetadataXMLFile
40 extends File
41{
42 static final private String DESCRIPTION_ELEMENT = "Description";
43 static final private String DIRECTORY_FILENAME = ".*";
44 static final private String FILENAME_ELEMENT = "FileName";
45 static final private String FILESET_ELEMENT = "FileSet";
46 static final private String METADATA_ELEMENT = "Metadata";
47 static final private String[] nonEscapingElements = new String[]{FILENAME_ELEMENT};
48
49 /** Special metadata field: the filename encoding is a unique sort of metadata in
50 * that it is not just information stored with a collection file, but also needs to
51 * be applied in real-time to the collection file (to its filename) for display. */
52 static final public String FILENAME_ENCODING_METADATA = "gs.filenameEncoding";
53
54 // To speed things up a bit we keep the last accessed metadata.xml file in memory
55 static private File loaded_file = null;
56 static private Document loaded_file_document = null;
57 static private boolean loaded_file_changed = false;
58
59
60 public MetadataXMLFile(String metadata_xml_file_path)
61 {
62 super(metadata_xml_file_path);
63 }
64
65
66 public void addMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
67 {
68 // If this metadata.xml file isn't the one currently loaded, load it now
69 if (loaded_file != this) {
70 // First we must save out the currently loaded file
71 saveLoadedFile();
72
73 // Parse the metadata.xml file
74 Document document = XMLTools.parseXMLFile(this);
75 if (document == null) {
76 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
77 return;
78 }
79
80 loaded_file = this;
81 loaded_file_document = document;
82 }
83
84 // Determine the file's path relative to the location of the metadata.xml file
85 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
86 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
87 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
88 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
89 }
90
91 // Form a regular expression that specifies the scope of the metadata
92 String file_path_regexp;
93 if (file_relative_path.equals("")) {
94 // Special case for matching all files in the directory
95 file_path_regexp = DIRECTORY_FILENAME;
96 }
97 else {
98 // Convert the file path into a regular expression that will match it
99 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
100 }
101
102 // Find the appropriate FileSet element for this file
103 Element appropriate_fileset_element = null;
104
105 // Read all the FileSet elements in the file
106 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
107 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
108 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
109
110 // Check the FileName elements of the FileSet to see if we have a match
111 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
112 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
113 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
114 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
115
116 // Only exact matches can be extended with new metadata
117 if (current_filename_element_value.equals(file_path_regexp)) {
118 appropriate_fileset_element = current_fileset_element;
119 break;
120 }
121 }
122 }
123
124 // If no appropriate FileSet element exists create a new one for this file
125 if (appropriate_fileset_element == null) {
126 DebugStream.println("Creating new FileSet element for file since none exists..."+file_path_regexp);
127 appropriate_fileset_element = loaded_file_document.createElement(FILESET_ELEMENT);
128
129 Element new_filename_element = loaded_file_document.createElement(FILENAME_ELEMENT);
130 new_filename_element.appendChild(loaded_file_document.createTextNode(file_path_regexp));
131 appropriate_fileset_element.appendChild(new_filename_element);
132
133 Element new_description_element = loaded_file_document.createElement(DESCRIPTION_ELEMENT);
134 appropriate_fileset_element.appendChild(new_description_element);
135
136 // add the fileset element for .* at the top: especially important for
137 // non-accumulating (and override mode) meta. Other type fileset elements can be appended
138 if(file_path_regexp.equals(DIRECTORY_FILENAME)) {
139 loaded_file_document.getDocumentElement().insertBefore(appropriate_fileset_element,
140 loaded_file_document.getDocumentElement().getFirstChild());
141 } else {
142 loaded_file_document.getDocumentElement().appendChild(appropriate_fileset_element);
143 }
144 }
145
146 // Add each of the metadata values to the FileSet's Description element
147 Element description_element = (Element) appropriate_fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT).item(0);
148 for (int i = 0; i < metadata_values.size(); i++) {
149 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
150 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
151
152 // Remove any characters that are invalid in XML
153 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
154
155 // Square brackets need to be escaped because they are a special character in Greenstone
156 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
157 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
158
159 // the gs.filenameEncoding metadata is unique in that, when added, removed or
160 // changed, it must be applied on the file(name) whose metadata has been adjusted
161 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
162 metadata_value_string = processFilenameEncoding(file_path_regexp,
163 file_node, metadata_value_string, false);
164 // true only if removing meta
165 }
166
167 // Check if this piece of metadata has already been assigned to this FileSet element
168 boolean metadata_already_assigned = false;
169 NodeList metadata_elements_nodelist = description_element.getElementsByTagName(METADATA_ELEMENT);
170 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
171 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
172
173 // Check if the metadata element name matches
174 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
175 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
176 // if the metadata must not accumulate, then edit the current value
177 if (!metadata_value.isAccumulatingMetadata()) {
178 XMLTools.setNodeText(current_metadata_element, metadata_value_string);
179 metadata_already_assigned = true;
180 break;
181 }
182 // Check if the metadata element value matches
183 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
184 if (current_metadata_value_string.equals(metadata_value_string)) {
185 // Metadata already assigned
186 metadata_already_assigned = true;
187 break;
188 }
189 }
190 }
191
192 // If the piece of metadata hasn't already been assigned, add it now
193 if (!metadata_already_assigned) {
194 // Create a new Metadata element to record this metadata
195 Element new_metadata_element = loaded_file_document.createElement(METADATA_ELEMENT);
196 new_metadata_element.setAttribute("name", metadata_value.getMetadataElement().getFullName());
197 new_metadata_element.setAttribute("mode", (metadata_value.isAccumulatingMetadata() ? "accumulate" : "override"));
198 new_metadata_element.appendChild(loaded_file_document.createTextNode(metadata_value_string));
199
200 // Accumulating metadata: add at the end
201 if (metadata_value.isAccumulatingMetadata()) {
202 description_element.appendChild(new_metadata_element);
203 }
204 // Override metadata: add at the start (so it overrides inherited metadata without affecting other assigned metadata)
205 else {
206 description_element.insertBefore(new_metadata_element, description_element.getFirstChild());
207 }
208 }
209 }
210
211 // Remember that we've changed the file so it gets saved when a new one is loaded
212 loaded_file_changed = true;
213 }
214
215
216 public ArrayList getMetadataAssignedToFile(File file, boolean fileEncodingOnly)
217 {
218 // If this metadata.xml file isn't the one currently loaded, load it now
219 if (loaded_file != this) {
220 // First we must save out the currently loaded file
221 saveLoadedFile();
222
223 // Parse the metadata.xml file
224 Document document = XMLTools.parseXMLFile(this);
225 if (document == null) {
226 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
227 return new ArrayList();
228 }
229
230 loaded_file = this;
231 loaded_file_document = document;
232 }
233
234 // Determine the file's path relative to the location of the metadata.xml file
235 String file_relative_path = FilenameEncoding.fileToURLEncoding(file);
236 File metadata_xml_file_directory = getParentFile();
237 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(metadata_xml_file_directory);
238 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
239
240 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
241 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
242 }
243
244 // Build up a list of metadata assigned to this file
245 ArrayList metadata_values = new ArrayList();
246
247 // Read all the FileSet elements in the file
248 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
249 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
250 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
251 boolean current_fileset_matches = false;
252 boolean is_one_file_only_metadata = true;
253 File folder_metadata_inherited_from = null;
254
255 // Check the FileName elements of the FileSet to see if we have a match
256 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
257 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
258 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
259 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
260
261 // Does this fileset specify metadata for one file only?
262 is_one_file_only_metadata = true;
263 if (current_filename_element_value.indexOf("*") != -1 && !current_filename_element_value.equals(DIRECTORY_FILENAME)) {
264 // No, it specifies metadata for multiple files (but not all the files in the directory)
265 is_one_file_only_metadata = false;
266 }
267
268 // This fileset specifies metadata for the file
269 if (file_relative_path.matches(current_filename_element_value)) {
270 current_fileset_matches = true;
271 if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) {
272 folder_metadata_inherited_from = metadata_xml_file_directory;
273 }
274 break;
275 }
276
277 // This fileset specifies metadata for the folder the file is in
278 if (file_relative_path.startsWith(current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) {
279 current_fileset_matches = true;
280 folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value);
281 break;
282 }
283 }
284
285 // The FileSet doesn't apply, so move onto the next one
286 if (current_fileset_matches == false) {
287 continue;
288 }
289
290 // Read all the Metadata elements in the fileset
291 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
292 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
293 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
294 String metadata_element_name_full = current_metadata_element.getAttribute("name");
295 // if we're only looking for fileEncoding metadata and this isn't it, skip to the next
296 if(fileEncodingOnly && !metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
297 continue;
298 }
299 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
300
301 // Ignore legacy crap
302 if (metadata_set_namespace.equals("hidden")) {
303 continue;
304 }
305
306 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
307 if (metadata_set == null) {
308 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
309 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
310 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
311 // Skip this element if we still don't have a loaded element for it
312 continue;
313 }
314
315 metadata_element_name_full = target_metadata_element_name_full;
316 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
317 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
318 }
319
320 MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(metadata_element_name_full);
321
322 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
323 // If the element doesn't exist in the metadata set, we're not interested
324 //Shaoqun modified. It needs to be added to metadata_set because the user might disable skim file
325 if (metadata_element == null) {
326 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
327 // continue;
328 }
329
330 // Square brackets need to be escaped because they are a special character in Greenstone
331 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
332 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
333 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
334
335 MetadataValueTreeNode metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
336
337 // If there is no metadata value tree node for this value, create it
338 if (metadata_value_tree_node == null) {
339 DebugStream.println("Note: No value tree node for metadata value \"" + metadata_value_string + "\"");
340 metadata_element.addMetadataValue(metadata_value_string);
341 metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
342 }
343
344 MetadataValue metadata_value = new MetadataValue(metadata_element, metadata_value_tree_node);
345 metadata_value.inheritsMetadataFromFolder(folder_metadata_inherited_from);
346 metadata_value.setIsOneFileOnlyMetadata(is_one_file_only_metadata);
347
348 // Is this accumulating metadata?
349 if (current_metadata_element.getAttribute("mode").equals("accumulate")) {
350 metadata_value.setIsAccumulatingMetadata(true);
351 }
352
353 // Add the new metadata value to the list
354 metadata_values.add(metadata_value);
355 }
356 }
357
358 return metadata_values;
359 }
360
361
362 public void removeMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
363 {
364 // If this metadata.xml file isn't the one currently loaded, load it now
365 if (loaded_file != this) {
366 // First we must save out the currently loaded file
367 saveLoadedFile();
368
369 // Parse the metadata.xml file
370 Document document = XMLTools.parseXMLFile(this);
371 if (document == null) {
372 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
373 return;
374 }
375
376 loaded_file = this;
377 loaded_file_document = document;
378 }
379
380 // Determine the file's path relative to the location of the metadata.xml file
381 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
382 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
383 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
384 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
385 }
386
387 // Form a regular expression that specifies the scope of the metadata
388 String file_path_regexp;
389 if (file_relative_path.equals("")) {
390 // Special case for matching all files in the directory
391 file_path_regexp = DIRECTORY_FILENAME;
392 }
393 else {
394 // Convert the file path into a regular expression that will match it
395 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
396 }
397
398 // Find the appropriate FileSet element for this file
399 Element appropriate_fileset_element = null;
400
401 // Read all the FileSet elements in the file
402 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
403 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
404 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
405
406 // Check the FileName elements of the FileSet to see if we have a match
407 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
408 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
409 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
410 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
411
412 // Only exact matches can be extended with new metadata
413 if (current_filename_element_value.equals(file_path_regexp)) {
414 appropriate_fileset_element = current_fileset_element;
415 break;
416 }
417 }
418 }
419
420 // If no appropriate FileSet element exists the metadata isn't assigned in this metadata.xml file
421 if (appropriate_fileset_element == null) {
422 DebugStream.println("Note: No appropriate FileSet element found when removing metadata from " + this);
423 return;
424 }
425
426 // Remove each of the metadata values from the FileSet's Description element
427 for (int i = 0; i < metadata_values.size(); i++) {
428 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
429
430 // Remove any characters that are invalid in XML
431 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
432
433 // Square brackets need to be escaped because they are a special character in Greenstone
434 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
435 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
436
437 // Find the Metadata element to delete from the fileset
438 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
439 NodeList metadata_elements_nodelist = appropriate_fileset_element.getElementsByTagName(METADATA_ELEMENT);
440 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
441 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
442
443 // Check the metadata element name matches
444 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
445 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
446 // Check the metadata element value matches
447 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
448 if (current_metadata_value_string.equals(metadata_value_string)) {
449
450 // Remove this Metadata element
451 current_metadata_element.getParentNode().removeChild(current_metadata_element);
452
453 // the gs.filenameEncoding metadata is unique in that, when added, removed or
454 // changed, it must be applied on the file(name) whose metadata has been adjusted
455 if(current_metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
456
457 // metadata_value_string will hereafter be the inherited gs.FilenameEncoding
458 // metadata (if any), now that the value at this level has been removed
459 metadata_value_string = processFilenameEncoding(file_path_regexp,
460 file_node, "", true); // true only if *removing* this meta
461 }
462
463 // If there are no Metadata elements left now, remove the (empty) FileSet element
464 if (metadata_elements_nodelist.getLength() == 0) {
465 appropriate_fileset_element.getParentNode().removeChild(appropriate_fileset_element);
466 }
467
468 break;
469 }
470 }
471 }
472 }
473
474 // Remember that we've changed the file so it gets saved when a new one is loaded
475 loaded_file_changed = true;
476 }
477
478
479 public void replaceMetadata(CollectionTreeNode file_node, MetadataValue old_metadata_value, MetadataValue new_metadata_value)
480 {
481 // If this metadata.xml file isn't the one currently loaded, load it now
482 if (loaded_file != this) {
483 // First we must save out the currently loaded file
484 saveLoadedFile();
485
486 // Parse the metadata.xml file
487 Document document = XMLTools.parseXMLFile(this);
488 if (document == null) {
489 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
490 return;
491 }
492
493 loaded_file = this;
494 loaded_file_document = document;
495 }
496
497 // Determine the file's path relative to the location of the metadata.xml file
498 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
499 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
500 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
501 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
502 }
503
504 // Form a regular expression that specifies the scope of the metadata
505 String file_path_regexp;
506 if (file_relative_path.equals("")) {
507 // Special case for matching all files in the directory
508 file_path_regexp = DIRECTORY_FILENAME;
509 }
510 else {
511 // Convert the file path into a regular expression that will match it
512 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
513 }
514
515 // Remove any characters that are invalid in XML
516 String old_metadata_value_string = XMLTools.removeInvalidCharacters(old_metadata_value.getFullValue());
517 String new_metadata_value_string = XMLTools.removeInvalidCharacters(new_metadata_value.getFullValue());
518
519 // Square brackets need to be escaped because they are a special character in Greenstone
520 old_metadata_value_string = old_metadata_value_string.replaceAll("\\[", "&#091;");
521 old_metadata_value_string = old_metadata_value_string.replaceAll("\\]", "&#093;");
522 new_metadata_value_string = new_metadata_value_string.replaceAll("\\[", "&#091;");
523 new_metadata_value_string = new_metadata_value_string.replaceAll("\\]", "&#093;");
524
525 // Read all the FileSet elements in the file
526 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
527 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
528 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
529 boolean current_fileset_matches = false;
530
531 // Check the FileName elements of the FileSet to see if we have a match
532 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
533 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
534 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
535 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
536
537 // Only exact matches can be edited
538 if (current_filename_element_value.equals(file_path_regexp)) {
539 current_fileset_matches = true;
540 break;
541 }
542 }
543
544 // The FileSet doesn't apply, so move onto the next one
545 if (current_fileset_matches == false) {
546 continue;
547 }
548
549 // Each metadata value is only allowed to be assigned once
550 boolean new_metadata_value_already_exists = false;
551 Element metadata_element_to_edit = null;
552
553 // Find the Metadata element to replace in the fileset
554 String metadata_element_name_full = old_metadata_value.getMetadataElement().getFullName();
555 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
556 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
557 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
558
559 // Check the metadata element name matches
560 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
561 if (!current_metadata_element_name_full.equals(metadata_element_name_full)) {
562 continue;
563 }
564
565 // Check the new metadata value doesn't already exist
566 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
567 if (current_metadata_value_string.equals(new_metadata_value_string)) {
568 new_metadata_value_already_exists = true;
569 }
570
571 // Check the metadata element value matches
572 if (current_metadata_value_string.equals(old_metadata_value_string)) {
573 metadata_element_to_edit = current_metadata_element;
574 }
575 }
576
577 // If the new metadata value already existed, remove the original value
578 if (new_metadata_value_already_exists) {
579 if(metadata_element_to_edit != null) { //?????????
580 metadata_element_to_edit.getParentNode().removeChild(metadata_element_to_edit);
581 } else {
582 System.err.println("ERROR MetadataXMLFile: metadata_element_to_edit is null");
583 }
584 }
585 // Otherwise replace the old value with the new value
586 // Ensure metadata_element_to_edit isn't null (may occur when multiple files are selected)
587 else if (metadata_element_to_edit != null) {
588
589 // the gs.filenameEncoding metadata is unique in that, when added, removed or
590 // changed, it must be applied on the file(name) whose metadata has been adjusted
591 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
592 new_metadata_value_string = processFilenameEncoding(file_path_regexp, file_node, new_metadata_value_string, false);
593 // true only if removing meta
594 }
595 XMLTools.setElementTextValue(metadata_element_to_edit, new_metadata_value_string);
596 }
597 }
598
599 // Remember that we've changed the file so it gets saved when a new one is loaded
600 loaded_file_changed = true;
601 }
602
603
604 static public void saveLoadedFile()
605 {
606 // If we have a file loaded into memory and it has been modified, save it now
607 if (loaded_file != null && loaded_file_changed == true) {
608 XMLTools.writeXMLFile(loaded_file, loaded_file_document, nonEscapingElements);
609
610
611 loaded_file_changed = false;
612 }
613 }
614
615
616 /**
617 * Every metadata.xml file must be skimmed when a collection is opened, for three very important reasons:
618 * - To handle any non-namespaced metadata in the metadata.xml files (this is mapped and the files rewritten)
619 * - To get a complete list of the metadata elements in the collection (used in Design and Format panes)
620 * - To build complete and accurate metadata value trees (used in the Enrich pane)
621 */
622 public void skimFile()
623 {
624 boolean file_changed = false;
625
626 // Parse the metadata.xml file
627 DebugStream.println("Skimming metadata.xml file " + this + "...");
628
629 Document document = XMLTools.parseXMLFile(this);
630 if (document == null) {
631 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
632 return;
633 }
634
635 // Read all the Metadata elements in the file
636 HashMap target_metadata_element_name_attrs_cache = new HashMap();
637 NodeList metadata_elements_nodelist = document.getElementsByTagName(METADATA_ELEMENT);
638 for (int i = 0; i < metadata_elements_nodelist.getLength(); i++) {
639 Element current_metadata_element = (Element) metadata_elements_nodelist.item(i);
640 String metadata_element_name_full = current_metadata_element.getAttribute("name");
641 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
642
643 // Ignore legacy crap
644 if (metadata_set_namespace.equals("hidden")) {
645 continue;
646 }
647
648 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
649 if (metadata_set == null) {
650 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
651 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
652 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
653 // Skip this element if we still don't have a loaded element for it
654 continue;
655 }
656
657 // Update the metadata.xml file to have the new (namespaced) element name
658 // Instead of using current_metadata_element.setAttribute("name", target_metadata_element_name_full)
659 // we create an Attr object for each target metadata element name, and cache them
660 // This makes a *huge* difference (namespacing a metadata.xml file with 45000 metadata entries now
661 // takes 45 seconds instead of 30 minutes!) -- why is setting the value of a Node so slow?
662 Attr target_metadata_element_name_attr = (Attr) target_metadata_element_name_attrs_cache.get(target_metadata_element_name_full);
663 if (target_metadata_element_name_attr == null) {
664 target_metadata_element_name_attr = document.createAttribute("name");
665 target_metadata_element_name_attr.setValue(target_metadata_element_name_full);
666 target_metadata_element_name_attrs_cache.put(target_metadata_element_name_full, target_metadata_element_name_attr);
667 }
668
669 // Remove the old name attribute and add the new (namespaced) one
670 current_metadata_element.removeAttribute("name");
671 current_metadata_element.setAttributeNode((Attr) target_metadata_element_name_attr.cloneNode(false));
672 file_changed = true;
673
674 metadata_element_name_full = target_metadata_element_name_full;
675 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
676 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
677 }
678
679 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
680 MetadataElement metadata_element = metadata_set.getMetadataElementWithName(metadata_element_name);
681
682 // If the element doesn't exist in the metadata set, add it
683 if (metadata_element == null) {
684 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
685 }
686
687 // Square brackets need to be escaped because they are a special character in Greenstone
688 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
689 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
690 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
691
692 metadata_element.addMetadataValue(metadata_value_string);
693 }
694
695 // Rewrite the metadata.xml file if it has changed
696 if (file_changed) {
697 XMLTools.writeXMLFile(this, document);
698 }
699 }
700
701 /**
702 * The gs.filenameEncoding metadata is unique in that, when added, removed or
703 * replaced, it must be applied on the file(name) whose metadata has been
704 * adjusted.
705 * This method handles all that, given the regular expression or filepath name
706 * to match on (.* matches subdirectories), the affected fileNode, the new
707 * encoding value and whether a new encoding value has been added/an existing
708 * one has been replaced or whether the encoding metadata has been removed.
709 * The new adjusted value for the encoding metadata is returned.
710 *
711 * MetadataXMLFileManager maintains a hashmap of (URL-encoded filepaths, encoding)
712 * to allow fast access to previously assigned gs.filenameEncoding metadata (if
713 * any) for each file. This hashmap also needs to be updated, but this update
714 * is complicated by the fact that it concerns regular expressions that could
715 * affect multiple filenames.
716 */
717 public String processFilenameEncoding(String file_path_regexp, CollectionTreeNode file_node,
718 String encoding_metadata_value, boolean removingMetadata)
719 {
720 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
721 return encoding_metadata_value;
722 }
723
724 // Work out this filenode's new encoding and apply it:
725
726 if(removingMetadata) { // encoding_metadata_value = ""
727 // gs.filenameEncoding metadata being removed, work out
728 // any inherited metadata to replace it with in the meta-table
729 encoding_metadata_value = FilenameEncoding.getInheritedFilenameEncoding(
730 file_node.getURLEncodedFilePath(), file_node.getFile());
731 // should be canonical encoding already
732 }
733 else if(!encoding_metadata_value.equals("")) {
734 // if adding or replacing filename encoding,
735 // get the canonical encoding name for this alias
736 encoding_metadata_value = FilenameEncoding.canonicalEncodingName(encoding_metadata_value);
737 }
738 // Reencode the display of this filenode only as any affected
739 // childnodes will be reencoded on FileNode.refreshDescendantEncodings()
740 file_node.reencodeDisplayName(encoding_metadata_value);
741
742
743 // Whether removing or adding/replacing the file's gs.filename encoding meta,
744 // store this in the file-to-encoding map for fast access, since the map stores
745 // empty string values when no meta has been assigned at this file level.
746 // In the case of removingMetadata, the value stored will be the fallback value
747
748 String urlpath = file_node.getURLEncodedFilePath();
749 if(removingMetadata) {
750 // remove it from the map instead of inserting "", so that when folders in the collectiontree
751 // are being deleted or shifted, the removemetada (and addmetadata) calls that get fired
752 // for each affected filenodes does not cause the undesirable effect of multiple "" to be
753 // entered into the filename-to-encoding map for filepaths that no longer exist .
754 FilenameEncoding.map.remove(urlpath);
755 } else { // for adding and replacing, put the encoding into the map (also replaces any existing encoding for it)
756 FilenameEncoding.map.put(urlpath, encoding_metadata_value);
757 }
758
759 // If new folder-level metadata (or metadata for a set of files fitting a pattern) has been
760 // assigned, the file_to_encodings map will be cleared for all descendant folders and files,
761 // so that these can be re-calculated upon refreshing the visible parts of the CollectionTree.
762 // Mark the state as requiring a refresh of the CollectionTree.
763 // This next step also serves to prevent the MetadataValueTableModel from trying to update
764 // itself while a refresh (involving re-encoding of filenames of visible nodes) is in progress.
765 FilenameEncoding.setRefreshRequired(true);
766
767 return encoding_metadata_value;
768 }
769}
Note: See TracBrowser for help on using the repository browser.