source: main/trunk/gli/src/org/greenstone/gatherer/metadata/MetadataXMLFile.java@ 23433

Last change on this file since 23433 was 23433, checked in by ak19, 13 years ago

GLI now has a gs.FilenameEncoding metadata field which appears like all the others in GLI's EnrichPane, but is unique in that this metadata (once set, changed or removed) must be applied to the affected filenames in the Collection Tree. More importantly, the changes made for this are to allow GLI's java code to interact with the recent changes to Perl where strings were made unicode-aware (for proper regex matching) but which required other changes elsewhere. To still support filenames with different encodings Perl used URL encoded versions of filenames representing characters' code point values in URL encoding. This required that GLI write out URL encoded filenames to the metadata.xml files that are associated with each folder level of a collection, so that Perl can read them. In this way, they can both speak of the same filenames. Only works on unicode 16 (such as latin-1), non-UTF8 systems. The latter is a requirement since Java uses the filesystem encoding from startup. If it is UTF8, non-recognised characters are replaced by the invalid char for UTF8. This process being destructive, we can't get the original filenames' bytecodes back. The changes made to GLI will work on Windows which is UTF-16 (windows codepage 1252), presumably also Macs (some kind of UTF-16) and also works on Native Latin 1 Linux systems. UTF-8 Linux systems need to be reconfigured to Native Latin-1, or if not installed, an administrator can install it easily.

  • Property svn:keywords set to Author Date Id Revision
File size: 35.3 KB
Line 
1/**
2 *############################################################################
3 * A component of the Greenstone Librarian Interface, part of the Greenstone
4 * digital library suite from the New Zealand Digital Library Project at the
5 * University of Waikato, New Zealand.
6 *
7 * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ
8 *
9 * Copyright (C) 2004 New Zealand Digital Library Project
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *############################################################################
25 */
26
27package org.greenstone.gatherer.metadata;
28
29
30import java.io.*;
31import java.util.*;
32import org.greenstone.gatherer.DebugStream;
33import org.greenstone.gatherer.collection.CollectionTreeNode;
34import org.greenstone.gatherer.util.XMLTools;
35import org.w3c.dom.*;
36
37
38/** This class represents one metadata.xml file */
39public class MetadataXMLFile
40 extends File
41{
42 static final private String DESCRIPTION_ELEMENT = "Description";
43 static final private String DIRECTORY_FILENAME = ".*";
44 static final private String FILENAME_ELEMENT = "FileName";
45 static final private String FILESET_ELEMENT = "FileSet";
46 static final private String METADATA_ELEMENT = "Metadata";
47
48 /** Special metadata field: the filename encoding is a unique sort of metadata in
49 * that it is not just information stored with a collection file, but also needs to
50 * be applied in real-time to the collection file (to its filename) for display. */
51 static final public String FILENAME_ENCODING_METADATA = "gs.filenameEncoding";
52
53 // To speed things up a bit we keep the last accessed metadata.xml file in memory
54 static private File loaded_file = null;
55 static private Document loaded_file_document = null;
56 static private boolean loaded_file_changed = false;
57
58
59 public MetadataXMLFile(String metadata_xml_file_path)
60 {
61 super(metadata_xml_file_path);
62 }
63
64
65 public void addMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
66 {
67 // If this metadata.xml file isn't the one currently loaded, load it now
68 if (loaded_file != this) {
69 // First we must save out the currently loaded file
70 saveLoadedFile();
71
72 // Parse the metadata.xml file
73 Document document = XMLTools.parseXMLFile(this);
74 if (document == null) {
75 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
76 return;
77 }
78
79 loaded_file = this;
80 loaded_file_document = document;
81 }
82
83 // Determine the file's path relative to the location of the metadata.xml file
84 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
85 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
86 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
87 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
88 }
89
90 // Form a regular expression that specifies the scope of the metadata
91 String file_path_regexp;
92 if (file_relative_path.equals("")) {
93 // Special case for matching all files in the directory
94 file_path_regexp = DIRECTORY_FILENAME;
95 }
96 else {
97 // Convert the file path into a regular expression that will match it
98 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
99 }
100
101 // Find the appropriate FileSet element for this file
102 Element appropriate_fileset_element = null;
103
104 // Read all the FileSet elements in the file
105 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
106 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
107 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
108
109 // Check the FileName elements of the FileSet to see if we have a match
110 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
111 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
112 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
113 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
114
115 // Only exact matches can be extended with new metadata
116 if (current_filename_element_value.equals(file_path_regexp)) {
117 appropriate_fileset_element = current_fileset_element;
118 break;
119 }
120 }
121 }
122
123 // If no appropriate FileSet element exists create a new one for this file
124 if (appropriate_fileset_element == null) {
125 DebugStream.println("Creating new FileSet element for file since none exists...");
126 appropriate_fileset_element = loaded_file_document.createElement(FILESET_ELEMENT);
127
128 Element new_filename_element = loaded_file_document.createElement(FILENAME_ELEMENT);
129 new_filename_element.appendChild(loaded_file_document.createTextNode(file_path_regexp));
130 appropriate_fileset_element.appendChild(new_filename_element);
131
132 Element new_description_element = loaded_file_document.createElement(DESCRIPTION_ELEMENT);
133 appropriate_fileset_element.appendChild(new_description_element);
134
135 // add the fileset element for .* at the top: especially important for
136 // non-accumulating (and override mode) meta. Other type fileset elements can be appended
137 if(file_path_regexp.equals(DIRECTORY_FILENAME)) {
138 loaded_file_document.getDocumentElement().insertBefore(appropriate_fileset_element,
139 loaded_file_document.getDocumentElement().getFirstChild());
140 } else {
141 loaded_file_document.getDocumentElement().appendChild(appropriate_fileset_element);
142 }
143 }
144
145 // Add each of the metadata values to the FileSet's Description element
146 Element description_element = (Element) appropriate_fileset_element.getElementsByTagName(DESCRIPTION_ELEMENT).item(0);
147 for (int i = 0; i < metadata_values.size(); i++) {
148 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
149 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
150
151 // Remove any characters that are invalid in XML
152 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
153
154 // Square brackets need to be escaped because they are a special character in Greenstone
155 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
156 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
157
158 // the gs.filenameEncoding metadata is unique in that, when added, removed or
159 // changed, it must be applied on the file(name) whose metadata has been adjusted
160 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
161 metadata_value_string = processFilenameEncoding(file_path_regexp,
162 file_node, metadata_value_string, false);
163 // true only if removing meta
164 }
165
166 // Check if this piece of metadata has already been assigned to this FileSet element
167 boolean metadata_already_assigned = false;
168 NodeList metadata_elements_nodelist = description_element.getElementsByTagName(METADATA_ELEMENT);
169 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
170 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
171
172 // Check if the metadata element name matches
173 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
174 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
175 // if the metadata must not accumulate, then edit the current value
176 if (!metadata_value.isAccumulatingMetadata()) {
177 XMLTools.setNodeText(current_metadata_element, metadata_value_string);
178 metadata_already_assigned = true;
179 break;
180 }
181 // Check if the metadata element value matches
182 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
183 if (current_metadata_value_string.equals(metadata_value_string)) {
184 // Metadata already assigned
185 metadata_already_assigned = true;
186 break;
187 }
188 }
189 }
190
191 // If the piece of metadata hasn't already been assigned, add it now
192 if (!metadata_already_assigned) {
193 // Create a new Metadata element to record this metadata
194 Element new_metadata_element = loaded_file_document.createElement(METADATA_ELEMENT);
195 new_metadata_element.setAttribute("name", metadata_value.getMetadataElement().getFullName());
196 new_metadata_element.setAttribute("mode", (metadata_value.isAccumulatingMetadata() ? "accumulate" : "override"));
197 new_metadata_element.appendChild(loaded_file_document.createTextNode(metadata_value_string));
198
199 // Accumulating metadata: add at the end
200 if (metadata_value.isAccumulatingMetadata()) {
201 description_element.appendChild(new_metadata_element);
202 }
203 // Override metadata: add at the start (so it overrides inherited metadata without affecting other assigned metadata)
204 else {
205 description_element.insertBefore(new_metadata_element, description_element.getFirstChild());
206 }
207 }
208 }
209
210 // Remember that we've changed the file so it gets saved when a new one is loaded
211 loaded_file_changed = true;
212 }
213
214
215 public ArrayList getMetadataAssignedToFile(File file, boolean fileEncodingOnly)
216 {
217 // If this metadata.xml file isn't the one currently loaded, load it now
218 if (loaded_file != this) {
219 // First we must save out the currently loaded file
220 saveLoadedFile();
221
222 // Parse the metadata.xml file
223 Document document = XMLTools.parseXMLFile(this);
224 if (document == null) {
225 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
226 return new ArrayList();
227 }
228
229 loaded_file = this;
230 loaded_file_document = document;
231 }
232
233 // Determine the file's path relative to the location of the metadata.xml file
234 String file_relative_path = FilenameEncoding.fileToURLEncoding(file);
235 File metadata_xml_file_directory = getParentFile();
236 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(metadata_xml_file_directory);
237 file_relative_path = file_relative_path.substring(metadata_xml_file_directory_path.length());
238
239 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
240 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
241 }
242
243 // Build up a list of metadata assigned to this file
244 ArrayList metadata_values = new ArrayList();
245
246 // Read all the FileSet elements in the file
247 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
248 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
249 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
250 boolean current_fileset_matches = false;
251 boolean is_one_file_only_metadata = true;
252 File folder_metadata_inherited_from = null;
253
254 // Check the FileName elements of the FileSet to see if we have a match
255 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
256 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
257 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
258 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
259
260 // Does this fileset specify metadata for one file only?
261 is_one_file_only_metadata = true;
262 if (current_filename_element_value.indexOf("*") != -1 && !current_filename_element_value.equals(DIRECTORY_FILENAME)) {
263 // No, it specifies metadata for multiple files (but not all the files in the directory)
264 is_one_file_only_metadata = false;
265 }
266
267 // This fileset specifies metadata for the file
268 if (file_relative_path.matches(current_filename_element_value)) {
269 current_fileset_matches = true;
270 if (!file_relative_path.equals("") && current_filename_element_value.equals(DIRECTORY_FILENAME)) {
271 folder_metadata_inherited_from = metadata_xml_file_directory;
272 }
273 break;
274 }
275
276 // This fileset specifies metadata for the folder the file is in
277 if (file_relative_path.startsWith(current_filename_element_value + FilenameEncoding.URL_FILE_SEPARATOR)) {
278 current_fileset_matches = true;
279 folder_metadata_inherited_from = new File(metadata_xml_file_directory, current_filename_element_value);
280 break;
281 }
282 }
283
284 // The FileSet doesn't apply, so move onto the next one
285 if (current_fileset_matches == false) {
286 continue;
287 }
288
289 // Read all the Metadata elements in the fileset
290 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
291 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
292 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
293 String metadata_element_name_full = current_metadata_element.getAttribute("name");
294 // if we're only looking for fileEncoding metadata and this isn't it, skip to the next
295 if(fileEncodingOnly && !metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
296 continue;
297 }
298 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
299
300 // Ignore legacy crap
301 if (metadata_set_namespace.equals("hidden")) {
302 continue;
303 }
304
305 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
306 if (metadata_set == null) {
307 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
308 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
309 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
310 // Skip this element if we still don't have a loaded element for it
311 continue;
312 }
313
314 metadata_element_name_full = target_metadata_element_name_full;
315 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
316 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
317 }
318
319 MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(metadata_element_name_full);
320
321 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
322 // If the element doesn't exist in the metadata set, we're not interested
323 //Shaoqun modified. It needs to be added to metadata_set because the user might disable skim file
324 if (metadata_element == null) {
325 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
326 // continue;
327 }
328
329 // Square brackets need to be escaped because they are a special character in Greenstone
330 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
331 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
332 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
333
334 MetadataValueTreeNode metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
335
336 // If there is no metadata value tree node for this value, create it
337 if (metadata_value_tree_node == null) {
338 DebugStream.println("Note: No value tree node for metadata value \"" + metadata_value_string + "\"");
339 metadata_element.addMetadataValue(metadata_value_string);
340 metadata_value_tree_node = metadata_element.getMetadataValueTreeNode(metadata_value_string);
341 }
342
343 MetadataValue metadata_value = new MetadataValue(metadata_element, metadata_value_tree_node);
344 metadata_value.inheritsMetadataFromFolder(folder_metadata_inherited_from);
345 metadata_value.setIsOneFileOnlyMetadata(is_one_file_only_metadata);
346
347 // Is this accumulating metadata?
348 if (current_metadata_element.getAttribute("mode").equals("accumulate")) {
349 metadata_value.setIsAccumulatingMetadata(true);
350 }
351
352 // Add the new metadata value to the list
353 metadata_values.add(metadata_value);
354 }
355 }
356
357 return metadata_values;
358 }
359
360
361 public void removeMetadata(CollectionTreeNode file_node, ArrayList metadata_values)
362 {
363 // If this metadata.xml file isn't the one currently loaded, load it now
364 if (loaded_file != this) {
365 // First we must save out the currently loaded file
366 saveLoadedFile();
367
368 // Parse the metadata.xml file
369 Document document = XMLTools.parseXMLFile(this);
370 if (document == null) {
371 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
372 return;
373 }
374
375 loaded_file = this;
376 loaded_file_document = document;
377 }
378
379 // Determine the file's path relative to the location of the metadata.xml file
380 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
381 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
382 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
383 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
384 }
385
386 // Form a regular expression that specifies the scope of the metadata
387 String file_path_regexp;
388 if (file_relative_path.equals("")) {
389 // Special case for matching all files in the directory
390 file_path_regexp = DIRECTORY_FILENAME;
391 }
392 else {
393 // Convert the file path into a regular expression that will match it
394 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
395 }
396
397 // Find the appropriate FileSet element for this file
398 Element appropriate_fileset_element = null;
399
400 // Read all the FileSet elements in the file
401 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
402 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
403 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
404
405 // Check the FileName elements of the FileSet to see if we have a match
406 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
407 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
408 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
409 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
410
411 // Only exact matches can be extended with new metadata
412 if (current_filename_element_value.equals(file_path_regexp)) {
413 appropriate_fileset_element = current_fileset_element;
414 break;
415 }
416 }
417 }
418
419 // If no appropriate FileSet element exists the metadata isn't assigned in this metadata.xml file
420 if (appropriate_fileset_element == null) {
421 DebugStream.println("Note: No appropriate FileSet element found when removing metadata from " + this);
422 return;
423 }
424
425 // Remove each of the metadata values from the FileSet's Description element
426 for (int i = 0; i < metadata_values.size(); i++) {
427 MetadataValue metadata_value = (MetadataValue) metadata_values.get(i);
428
429 // Remove any characters that are invalid in XML
430 String metadata_value_string = XMLTools.removeInvalidCharacters(metadata_value.getFullValue());
431
432 // Square brackets need to be escaped because they are a special character in Greenstone
433 metadata_value_string = metadata_value_string.replaceAll("\\[", "&#091;");
434 metadata_value_string = metadata_value_string.replaceAll("\\]", "&#093;");
435
436 // Find the Metadata element to delete from the fileset
437 String metadata_element_name_full = metadata_value.getMetadataElement().getFullName();
438 NodeList metadata_elements_nodelist = appropriate_fileset_element.getElementsByTagName(METADATA_ELEMENT);
439 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
440 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
441
442 // Check the metadata element name matches
443 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
444 if (current_metadata_element_name_full.equals(metadata_element_name_full)) {
445 // Check the metadata element value matches
446 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
447 if (current_metadata_value_string.equals(metadata_value_string)) {
448
449 // Remove this Metadata element
450 current_metadata_element.getParentNode().removeChild(current_metadata_element);
451
452 // the gs.filenameEncoding metadata is unique in that, when added, removed or
453 // changed, it must be applied on the file(name) whose metadata has been adjusted
454 if(current_metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
455
456 // metadata_value_string will hereafter be the inherited gs.FilenameEncoding
457 // metadata (if any), now that the value at this level has been removed
458 metadata_value_string = processFilenameEncoding(file_path_regexp,
459 file_node, "", true); // true only if *removing* this meta
460 }
461
462 // If there are no Metadata elements left now, remove the (empty) FileSet element
463 if (metadata_elements_nodelist.getLength() == 0) {
464 appropriate_fileset_element.getParentNode().removeChild(appropriate_fileset_element);
465 }
466
467 break;
468 }
469 }
470 }
471 }
472
473 // Remember that we've changed the file so it gets saved when a new one is loaded
474 loaded_file_changed = true;
475 }
476
477
478 public void replaceMetadata(CollectionTreeNode file_node, MetadataValue old_metadata_value, MetadataValue new_metadata_value)
479 {
480 // If this metadata.xml file isn't the one currently loaded, load it now
481 if (loaded_file != this) {
482 // First we must save out the currently loaded file
483 saveLoadedFile();
484
485 // Parse the metadata.xml file
486 Document document = XMLTools.parseXMLFile(this);
487 if (document == null) {
488 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
489 return;
490 }
491
492 loaded_file = this;
493 loaded_file_document = document;
494 }
495
496 // Determine the file's path relative to the location of the metadata.xml file
497 String metadata_xml_file_directory_path = FilenameEncoding.fileToURLEncoding(getParentFile());
498 String file_relative_path = file_node.getURLEncodedFilePath().substring(metadata_xml_file_directory_path.length());
499 if (file_relative_path.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
500 file_relative_path = file_relative_path.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
501 }
502
503 // Form a regular expression that specifies the scope of the metadata
504 String file_path_regexp;
505 if (file_relative_path.equals("")) {
506 // Special case for matching all files in the directory
507 file_path_regexp = DIRECTORY_FILENAME;
508 }
509 else {
510 // Convert the file path into a regular expression that will match it
511 file_path_regexp = MetadataTools.getRegularExpressionThatMatchesFilePath(file_relative_path);
512 }
513
514 // Remove any characters that are invalid in XML
515 String old_metadata_value_string = XMLTools.removeInvalidCharacters(old_metadata_value.getFullValue());
516 String new_metadata_value_string = XMLTools.removeInvalidCharacters(new_metadata_value.getFullValue());
517
518 // Square brackets need to be escaped because they are a special character in Greenstone
519 old_metadata_value_string = old_metadata_value_string.replaceAll("\\[", "&#091;");
520 old_metadata_value_string = old_metadata_value_string.replaceAll("\\]", "&#093;");
521 new_metadata_value_string = new_metadata_value_string.replaceAll("\\[", "&#091;");
522 new_metadata_value_string = new_metadata_value_string.replaceAll("\\]", "&#093;");
523
524 // Read all the FileSet elements in the file
525 NodeList fileset_elements_nodelist = loaded_file_document.getElementsByTagName(FILESET_ELEMENT);
526 for (int i = 0; i < fileset_elements_nodelist.getLength(); i++) {
527 Element current_fileset_element = (Element) fileset_elements_nodelist.item(i);
528 boolean current_fileset_matches = false;
529
530 // Check the FileName elements of the FileSet to see if we have a match
531 NodeList filename_elements_nodelist = current_fileset_element.getElementsByTagName(FILENAME_ELEMENT);
532 for (int j = 0; j < filename_elements_nodelist.getLength(); j++) {
533 Element current_filename_element = (Element) filename_elements_nodelist.item(j);
534 String current_filename_element_value = XMLTools.getElementTextValue(current_filename_element);
535
536 // Only exact matches can be edited
537 if (current_filename_element_value.equals(file_path_regexp)) {
538 current_fileset_matches = true;
539 break;
540 }
541 }
542
543 // The FileSet doesn't apply, so move onto the next one
544 if (current_fileset_matches == false) {
545 continue;
546 }
547
548 // Each metadata value is only allowed to be assigned once
549 boolean new_metadata_value_already_exists = false;
550 Element metadata_element_to_edit = null;
551
552 // Find the Metadata element to replace in the fileset
553 String metadata_element_name_full = old_metadata_value.getMetadataElement().getFullName();
554 NodeList metadata_elements_nodelist = current_fileset_element.getElementsByTagName(METADATA_ELEMENT);
555 for (int k = 0; k < metadata_elements_nodelist.getLength(); k++) {
556 Element current_metadata_element = (Element) metadata_elements_nodelist.item(k);
557
558 // Check the metadata element name matches
559 String current_metadata_element_name_full = current_metadata_element.getAttribute("name");
560 if (!current_metadata_element_name_full.equals(metadata_element_name_full)) {
561 continue;
562 }
563
564 // Check the new metadata value doesn't already exist
565 String current_metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
566 if (current_metadata_value_string.equals(new_metadata_value_string)) {
567 new_metadata_value_already_exists = true;
568 }
569
570 // Check the metadata element value matches
571 if (current_metadata_value_string.equals(old_metadata_value_string)) {
572 metadata_element_to_edit = current_metadata_element;
573 }
574 }
575
576 // If the new metadata value already existed, remove the original value
577 if (new_metadata_value_already_exists) {
578 if(metadata_element_to_edit != null) { //?????????
579 metadata_element_to_edit.getParentNode().removeChild(metadata_element_to_edit);
580 } else {
581 System.err.println("ERROR MetadataXMLFile: metadata_element_to_edit is null");
582 }
583 }
584 // Otherwise replace the old value with the new value
585 // Ensure metadata_element_to_edit isn't null (may occur when multiple files are selected)
586 else if (metadata_element_to_edit != null) {
587
588 // the gs.filenameEncoding metadata is unique in that, when added, removed or
589 // changed, it must be applied on the file(name) whose metadata has been adjusted
590 if(metadata_element_name_full.equals(FILENAME_ENCODING_METADATA)) {
591 new_metadata_value_string = processFilenameEncoding(file_path_regexp, file_node, new_metadata_value_string, false);
592 // true only if removing meta
593 }
594 XMLTools.setElementTextValue(metadata_element_to_edit, new_metadata_value_string);
595 }
596 }
597
598 // Remember that we've changed the file so it gets saved when a new one is loaded
599 loaded_file_changed = true;
600 }
601
602
603 static public void saveLoadedFile()
604 {
605 // If we have a file loaded into memory and it has been modified, save it now
606 if (loaded_file != null && loaded_file_changed == true) {
607 XMLTools.writeXMLFile(loaded_file, loaded_file_document);
608 loaded_file_changed = false;
609 }
610 }
611
612
613 /**
614 * Every metadata.xml file must be skimmed when a collection is opened, for three very important reasons:
615 * - To handle any non-namespaced metadata in the metadata.xml files (this is mapped and the files rewritten)
616 * - To get a complete list of the metadata elements in the collection (used in Design and Format panes)
617 * - To build complete and accurate metadata value trees (used in the Enrich pane)
618 */
619 public void skimFile()
620 {
621 boolean file_changed = false;
622
623 // Parse the metadata.xml file
624 DebugStream.println("Skimming metadata.xml file " + this + "...");
625
626 Document document = XMLTools.parseXMLFile(this);
627 if (document == null) {
628 System.err.println("Error: Could not parse metadata.xml file " + getAbsolutePath());
629 return;
630 }
631
632 // Read all the Metadata elements in the file
633 HashMap target_metadata_element_name_attrs_cache = new HashMap();
634 NodeList metadata_elements_nodelist = document.getElementsByTagName(METADATA_ELEMENT);
635 for (int i = 0; i < metadata_elements_nodelist.getLength(); i++) {
636 Element current_metadata_element = (Element) metadata_elements_nodelist.item(i);
637 String metadata_element_name_full = current_metadata_element.getAttribute("name");
638 String metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
639
640 // Ignore legacy crap
641 if (metadata_set_namespace.equals("hidden")) {
642 continue;
643 }
644
645 MetadataSet metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
646 if (metadata_set == null) {
647 // The metadata set isn't loaded, so give the option of mapping the element into a loaded set
648 String target_metadata_element_name_full = MetadataSetManager.mapUnloadedMetadataElement(metadata_element_name_full);
649 if (target_metadata_element_name_full == null || target_metadata_element_name_full.equals("")) {
650 // Skip this element if we still don't have a loaded element for it
651 continue;
652 }
653
654 // Update the metadata.xml file to have the new (namespaced) element name
655 // Instead of using current_metadata_element.setAttribute("name", target_metadata_element_name_full)
656 // we create an Attr object for each target metadata element name, and cache them
657 // This makes a *huge* difference (namespacing a metadata.xml file with 45000 metadata entries now
658 // takes 45 seconds instead of 30 minutes!) -- why is setting the value of a Node so slow?
659 Attr target_metadata_element_name_attr = (Attr) target_metadata_element_name_attrs_cache.get(target_metadata_element_name_full);
660 if (target_metadata_element_name_attr == null) {
661 target_metadata_element_name_attr = document.createAttribute("name");
662 target_metadata_element_name_attr.setValue(target_metadata_element_name_full);
663 target_metadata_element_name_attrs_cache.put(target_metadata_element_name_full, target_metadata_element_name_attr);
664 }
665
666 // Remove the old name attribute and add the new (namespaced) one
667 current_metadata_element.removeAttribute("name");
668 current_metadata_element.setAttributeNode((Attr) target_metadata_element_name_attr.cloneNode(false));
669 file_changed = true;
670
671 metadata_element_name_full = target_metadata_element_name_full;
672 metadata_set_namespace = MetadataTools.getMetadataSetNamespace(metadata_element_name_full);
673 metadata_set = MetadataSetManager.getMetadataSet(metadata_set_namespace);
674 }
675
676 String metadata_element_name = MetadataTools.getMetadataElementName(metadata_element_name_full);
677 MetadataElement metadata_element = metadata_set.getMetadataElementWithName(metadata_element_name);
678
679 // If the element doesn't exist in the metadata set, add it
680 if (metadata_element == null) {
681 metadata_element = metadata_set.addMetadataElementForThisSession(metadata_element_name);
682 }
683
684 // Square brackets need to be escaped because they are a special character in Greenstone
685 String metadata_value_string = XMLTools.getElementTextValue(current_metadata_element);
686 metadata_value_string = metadata_value_string.replaceAll("&#091;", "[");
687 metadata_value_string = metadata_value_string.replaceAll("&#093;", "]");
688
689 metadata_element.addMetadataValue(metadata_value_string);
690 }
691
692 // Rewrite the metadata.xml file if it has changed
693 if (file_changed) {
694 XMLTools.writeXMLFile(this, document);
695 }
696 }
697
698 /**
699 * The gs.filenameEncoding metadata is unique in that, when added, removed or
700 * replaced, it must be applied on the file(name) whose metadata has been
701 * adjusted.
702 * This method handles all that, given the regular expression or filepath name
703 * to match on (.* matches subdirectories), the affected fileNode, the new
704 * encoding value and whether a new encoding value has been added/an existing
705 * one has been replaced or whether the encoding metadata has been removed.
706 * The new adjusted value for the encoding metadata is returned.
707 *
708 * MetadataXMLFileManager maintains a hashmap of (URL-encoded filepaths, encoding)
709 * to allow fast access to previously assigned gs.filenameEncoding metadata (if
710 * any) for each file. This hashmap also needs to be updated, but this update
711 * is complicated by the fact that it concerns regular expressions that could
712 * affect multiple filenames.
713 */
714 public String processFilenameEncoding(String file_path_regexp, CollectionTreeNode file_node,
715 String encoding_metadata_value, boolean removingMetadata)
716 {
717 if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
718 return encoding_metadata_value;
719 }
720
721 // Work out this filenode's new encoding and apply it:
722
723 if(removingMetadata) { // encoding_metadata_value = ""
724 // gs.filenameEncoding metadata being removed, work out
725 // any inherited metadata to replace it with in the meta-table
726 encoding_metadata_value = FilenameEncoding.getInheritedFilenameEncoding(
727 file_node.getURLEncodedFilePath(), file_node.getFile());
728 // should be canonical encoding already
729 }
730 else if(!encoding_metadata_value.equals("")) {
731 // if adding or replacing filename encoding,
732 // get the canonical encoding name for this alias
733 encoding_metadata_value = FilenameEncoding.canonicalEncodingName(encoding_metadata_value);
734 }
735 // Reencode the display of this filenode only as any affected
736 // childnodes will be reencoded on FileNode.refreshDescendantEncodings()
737 file_node.reencodeDisplayName(encoding_metadata_value);
738
739
740 // Whether removing or adding/replacing the file's gs.filename encoding meta,
741 // store this in the file-to-encoding map for fast access, since the map stores
742 // empty string values when no meta has been assigned at this file level.
743 // In the case of removingMetadata, the value stored will be the fallback value
744
745 String urlpath = file_node.getURLEncodedFilePath();
746 if(removingMetadata) {
747 // remove it from the map instead of inserting "", so that when folders in the collectiontree
748 // are being deleted or shifted, the removemetada (and addmetadata) calls that get fired
749 // for each affected filenodes does not cause the undesirable effect of multiple "" to be
750 // entered into the filename-to-encoding map for filepaths that no longer exist .
751 FilenameEncoding.map.remove(urlpath);
752 } else { // for adding and replacing, put the encoding into the map (also replaces any existing encoding for it)
753 FilenameEncoding.map.put(urlpath, encoding_metadata_value);
754 }
755
756 // If new folder-level metadata (or metadata for a set of files fitting a pattern) has been
757 // assigned, the file_to_encodings map will be cleared for all descendant folders and files,
758 // so that these can be re-calculated upon refreshing the visible parts of the CollectionTree.
759 // Mark the state as requiring a refresh of the CollectionTree.
760 // This next step also serves to prevent the MetadataValueTableModel from trying to update
761 // itself while a refresh (involving re-encoding of filenames of visible nodes) is in progress.
762 FilenameEncoding.setRefreshRequired(true);
763
764 return encoding_metadata_value;
765 }
766}
Note: See TracBrowser for help on using the repository browser.