1 | /**
|
---|
2 | *#########################################################################
|
---|
3 | *
|
---|
4 | * A component of the Gatherer application, part of the Greenstone digital
|
---|
5 | * library suite from the New Zealand Digital Library Project at the
|
---|
6 | * University of Waikato, New Zealand.
|
---|
7 | *
|
---|
8 | * Author: John Thompson, Greenstone Digital Library, University of Waikato
|
---|
9 | *
|
---|
10 | * Copyright (C) 1999 New Zealand Digital Library Project
|
---|
11 | *
|
---|
12 | * This program is free software; you can redistribute it and/or modify
|
---|
13 | * it under the terms of the GNU General Public License as published by
|
---|
14 | * the Free Software Foundation; either version 2 of the License, or
|
---|
15 | * (at your option) any later version.
|
---|
16 | *
|
---|
17 | * This program is distributed in the hope that it will be useful,
|
---|
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
20 | * GNU General Public License for more details.
|
---|
21 | *
|
---|
22 | * You should have received a copy of the GNU General Public License
|
---|
23 | * along with this program; if not, write to the Free Software
|
---|
24 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
25 | *########################################################################
|
---|
26 | */
|
---|
27 | package org.greenstone.gatherer.msm;
|
---|
28 |
|
---|
29 | import java.io.*;
|
---|
30 | import java.util.*;
|
---|
31 | import org.greenstone.gatherer.DebugStream;
|
---|
32 | import org.greenstone.gatherer.Gatherer;
|
---|
33 | import org.greenstone.gatherer.msm.MetadataXMLFileManager;
|
---|
34 | import org.greenstone.gatherer.msm.MSMUtils;
|
---|
35 | import org.greenstone.gatherer.util.Codec;
|
---|
36 | import org.greenstone.gatherer.util.HashMap3D;
|
---|
37 | import org.greenstone.gatherer.util.StaticStrings;
|
---|
38 | import org.greenstone.gatherer.util.Utility;
|
---|
39 | import org.greenstone.gatherer.valuetree.GValueModel;
|
---|
40 | import org.greenstone.gatherer.valuetree.GValueNode;
|
---|
41 | import org.w3c.dom.*;
|
---|
42 |
|
---|
43 | /** This class wraps around a DOM Document providing methods for accessing the data within. In this case the DOM represents a Greenstone Directory metadata file. It provides the necessary functionality to create a new metadata.xml file.
|
---|
44 | * @author John Thompson, Greenstone Digital Library, University of Waikato
|
---|
45 | * @version 2.3b
|
---|
46 | */
|
---|
47 | public class MetadataXMLFile {
|
---|
48 | /** Record if the document this object is based on is up to date. */
|
---|
49 | private boolean up_to_date = true;
|
---|
50 | /** The document this class sources its data from. */
|
---|
51 | private Document base_document;
|
---|
52 | static final private String ACCUMULATE = "accumulate";
|
---|
53 | /** The pattern to match when searching for directory level assignments. */
|
---|
54 | static final private String DIRECTORY_FILENAME = ".*";
|
---|
55 | static final private String DESCRIPTION_ELEMENT = "Description";
|
---|
56 | static final public String FILENAME_ELEMENT = "FileName";
|
---|
57 | static final public String FILESET_ELEMENT = "FileSet";
|
---|
58 | static final private String HVALUE_ATTRIBUTE = "hvalue";
|
---|
59 | static final private String MODE_ATTRIBUTE = "mode";
|
---|
60 | static final private String OVERWRITE = "overwrite";
|
---|
61 | static final private String[] ALL_METADATA_TYPES = {StaticStrings.METADATA_ELEMENT, StaticStrings.EXTRACTED_METADATA_ELEMENT};
|
---|
62 |
|
---|
63 | /** Constructor which creates a brand new metadata.xml document. */
|
---|
64 | public MetadataXMLFile() {
|
---|
65 | // Create new document. We do this by loading a copy of the template. */
|
---|
66 | this.base_document = Utility.parse(Utility.GREENSTONEDIRECTORYMETADATA_TEMPLATE, true);
|
---|
67 | }
|
---|
68 |
|
---|
69 | /** Constructor which parses an existing metadata.xml document. */
|
---|
70 | public MetadataXMLFile(File file) {
|
---|
71 | try {
|
---|
72 | this.base_document = Utility.parse(file.getAbsolutePath(), false);
|
---|
73 | }
|
---|
74 | catch (Exception error) {
|
---|
75 | // Poorly formed, or completely invalid metadata.xml file!
|
---|
76 | }
|
---|
77 | }
|
---|
78 |
|
---|
79 | /** Constructor which wraps around an existing metadata.xml document. */
|
---|
80 | public MetadataXMLFile(Document base_document) {
|
---|
81 | this.base_document = base_document;
|
---|
82 | }
|
---|
83 |
|
---|
84 | /** Add this metadata to the named file. There is one tricky thing to consider. Whenever a metadata entry is added it is taken to be accumulating except if it is the first added, in which case it overwrites! Actually this gets worse, as we could have been told to append this metadata to a document which already inherits metadata. Thus we need a new argument to determine whether this add was triggered by an append or a replace. */
|
---|
85 | public void addMetadata(String filename, Metadata metadata, boolean force_accumulate)
|
---|
86 | {
|
---|
87 | ///ystem.err.println("Add '" + metadata + "' to " + (filename != null ? filename : "directory."));
|
---|
88 | try {
|
---|
89 | // Retrieve the document element
|
---|
90 | Element gdm_element = base_document.getDocumentElement();
|
---|
91 |
|
---|
92 | // Find the fileset element for the filename
|
---|
93 | Element fileset_element = findFileSetElementForFile(gdm_element, filename);
|
---|
94 |
|
---|
95 | // If there is no existing fileset, then create one
|
---|
96 | if (fileset_element == null) {
|
---|
97 | fileset_element = base_document.createElement(FILESET_ELEMENT);
|
---|
98 | Element filename_element = base_document.createElement(FILENAME_ELEMENT);
|
---|
99 | Element description_element = base_document.createElement(DESCRIPTION_ELEMENT);
|
---|
100 | fileset_element.appendChild(filename_element);
|
---|
101 | fileset_element.appendChild(description_element);
|
---|
102 | Text filename_text = null;
|
---|
103 | // If the filename is null then we add a directory metadata set as gdm_element's first child
|
---|
104 | if (filename == null) {
|
---|
105 | filename_text = base_document.createTextNode(DIRECTORY_FILENAME);
|
---|
106 | if (gdm_element.hasChildNodes()) {
|
---|
107 | gdm_element.insertBefore(fileset_element, gdm_element.getFirstChild());
|
---|
108 | }
|
---|
109 | else {
|
---|
110 | gdm_element.appendChild(fileset_element);
|
---|
111 | }
|
---|
112 | }
|
---|
113 | // Otherwise we append the new fileset to gdm_element's children
|
---|
114 | else {
|
---|
115 | ///ystem.err.println("New fileset for " + filename);
|
---|
116 | filename = Codec.transform(filename, Codec.TEXT_TO_REGEXP);
|
---|
117 | ///ystem.err.println("After transform: " + filename);
|
---|
118 | filename_text = base_document.createTextNode(filename);
|
---|
119 | gdm_element.appendChild(fileset_element);
|
---|
120 | }
|
---|
121 | filename_element.appendChild(filename_text);
|
---|
122 | filename_text = null;
|
---|
123 | description_element = null;
|
---|
124 | filename_element = null;
|
---|
125 | }
|
---|
126 | // Now, finally, we can add the metadata.
|
---|
127 | Element metadata_element = null;
|
---|
128 | String name = metadata.getElement().getName();
|
---|
129 | // If this is extracted metadata, we use a special element name that won't be recognized by greenstone
|
---|
130 | String current_metadata_type = StaticStrings.METADATA_ELEMENT;
|
---|
131 | if(name.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
|
---|
132 | current_metadata_type = StaticStrings.EXTRACTED_METADATA_ELEMENT;
|
---|
133 | name = name.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
|
---|
134 | }
|
---|
135 |
|
---|
136 | metadata_element = base_document.createElement(current_metadata_type);
|
---|
137 | metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, name);
|
---|
138 |
|
---|
139 | // To determine if this metadata entry should overwrite or accumulate we check if there are other entries with the same element in this fileset.
|
---|
140 | // also we are checking for the existence of exactly the same metadata cos sometimes we can be asked to add the same metadata twice.
|
---|
141 | boolean will_accumulate = false;
|
---|
142 | // only look through Metadata or XMetadata depending on which type we are trying to add
|
---|
143 | NodeList sibling_metadata_elements = fileset_element.getElementsByTagName(current_metadata_type);
|
---|
144 | for (int i=0; i<sibling_metadata_elements.getLength(); i++) {
|
---|
145 | Element sib_meta = (Element) sibling_metadata_elements.item(i);
|
---|
146 | if(name.equals(sib_meta.getAttribute(StaticStrings.NAME_ATTRIBUTE))) {
|
---|
147 | // found one with the same name - the new metadata will accumulate
|
---|
148 | will_accumulate = true;
|
---|
149 | // check for the same value
|
---|
150 | if(metadata.getAbsoluteValue().equals(MSMUtils.getValue(sib_meta))) {
|
---|
151 | return;
|
---|
152 | }
|
---|
153 |
|
---|
154 | }
|
---|
155 | sib_meta = null;
|
---|
156 | }
|
---|
157 | sibling_metadata_elements = null;
|
---|
158 |
|
---|
159 | if(will_accumulate || force_accumulate) { //mode.equals(ACCUMULATE)) {
|
---|
160 | metadata_element.setAttribute(MODE_ATTRIBUTE, ACCUMULATE);
|
---|
161 | }
|
---|
162 | // As we can't possibly store all the metadata in memory, nor can we ensure that the indexes written to file remain the same until the new time we look at this file, and to avoid having to open a rewrite every collection document whenever any value tree changes, I'm writing the value out as a full path string
|
---|
163 | // getAbsoluteValue now does return the full path
|
---|
164 | String node_value = metadata.getAbsoluteValue();
|
---|
165 | //GValueModel model = Gatherer.c_man.getCollection().msm.getValueTree(metadata.getElement());
|
---|
166 | //String node_value = null;
|
---|
167 | //if(model != null && model.isHierarchy()) {
|
---|
168 | //node_value = /odec.transform(metadata.getValueNode().getFullPath(false), /odec.TEXT_TO_DOM);
|
---|
169 | //node_value = metadata.getValueNode().getFullPath(false);
|
---|
170 | //}
|
---|
171 | //else {
|
---|
172 | //node_value = metadata.getAbsoluteValue();
|
---|
173 | // }
|
---|
174 | ///ystem.err.println("Creating node in MetadataXMLFile: '" + node_value + "'");
|
---|
175 | metadata_element.appendChild(base_document.createTextNode(node_value));
|
---|
176 | // Retrieve the first description element for this fileset (there should only be one, but I'll play it safe).
|
---|
177 | NodeList description_elements = fileset_element.getElementsByTagName("Description");
|
---|
178 | Element description_element = (Element) description_elements.item(0);
|
---|
179 | description_element.appendChild(metadata_element);
|
---|
180 | description_element = null;
|
---|
181 | metadata_element = null;
|
---|
182 | fileset_element = null;
|
---|
183 | gdm_element = null;
|
---|
184 | up_to_date = false;
|
---|
185 | }
|
---|
186 | catch (Exception error) {
|
---|
187 | DebugStream.printStackTrace(error);
|
---|
188 | }
|
---|
189 | }
|
---|
190 |
|
---|
191 | /** this is used to 'purge' the metadata - I've taken the purge code out of getMetadata and put it in to here, cos its only called from one place and we dont want to retrieve the metadata, just update it */
|
---|
192 | public void cleanUpMetadataRefs() {
|
---|
193 | //DebugStream.println("clean up metadata refs!");
|
---|
194 |
|
---|
195 | String file_relative_path = "";
|
---|
196 | try {
|
---|
197 | // Retrieve the document element.
|
---|
198 | Element directorymetadata_element = base_document.getDocumentElement();
|
---|
199 | // Iterate through all the metadata
|
---|
200 | // We have to do this for each type of metadata - do we need it for extracted metadata - will this only affect hierarchical metadata in which case only need to do normal metadata ?
|
---|
201 | for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
|
---|
202 | NodeList metadata_elements = directorymetadata_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
|
---|
203 | for(int l = 0; l < metadata_elements.getLength(); l++) {
|
---|
204 | Element metadata_element = (Element) metadata_elements.item(l);
|
---|
205 | String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
|
---|
206 | String raw_value = MSMUtils.getValue(metadata_element);
|
---|
207 | //
|
---|
208 | // ***** LEGACY SUPPORT *****
|
---|
209 | // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\'
|
---|
210 | if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) {
|
---|
211 | DebugStream.println("Detected Legacy Path: " + raw_value);
|
---|
212 | raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR);
|
---|
213 | MSMUtils.setValue(metadata_element, raw_value);
|
---|
214 | }
|
---|
215 |
|
---|
216 | ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
|
---|
217 | if (element != null) {
|
---|
218 | GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
|
---|
219 | String current_value = value.getFullPath(false);
|
---|
220 | if(!raw_value.equals(current_value)) {
|
---|
221 | // set the new value
|
---|
222 | MSMUtils.setValue(metadata_element, current_value);
|
---|
223 | }
|
---|
224 | value = null;
|
---|
225 | current_value = null;
|
---|
226 | }
|
---|
227 | element = null;
|
---|
228 | metadata_element = null;
|
---|
229 | raw_element = null;
|
---|
230 | raw_value = null;
|
---|
231 | } // for each metadata element
|
---|
232 | metadata_elements = null;
|
---|
233 | } // for each metadata type
|
---|
234 | } catch (Exception exception) {
|
---|
235 | DebugStream.printStackTrace(exception);
|
---|
236 | }
|
---|
237 | }
|
---|
238 |
|
---|
239 |
|
---|
240 | private Element findFileSetElementForFile(Element gdm_element, String filename)
|
---|
241 | {
|
---|
242 | // Iterate through the filesets looking for one that matches the given filename
|
---|
243 | NodeList fileset_elements = gdm_element.getElementsByTagName(FILESET_ELEMENT);
|
---|
244 | for (int i = 0; i < fileset_elements.getLength(); i++) {
|
---|
245 | Element fileset_element = (Element) fileset_elements.item(i);
|
---|
246 |
|
---|
247 | NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
|
---|
248 | for (int j = 0; j < filename_elements.getLength(); j++) {
|
---|
249 | Element filename_element = (Element) filename_elements.item(j);
|
---|
250 | String filename_pattern = MSMUtils.getValue(filename_element);
|
---|
251 |
|
---|
252 | // System.err.println("Checking " + filename + " against " + filename_pattern);
|
---|
253 |
|
---|
254 | // Is this a file match?
|
---|
255 | if (filename != null && filename.matches(filename_pattern) &&
|
---|
256 | !filename_pattern.equals(DIRECTORY_FILENAME)) {
|
---|
257 | // System.err.println("Existing file fileset!");
|
---|
258 | return fileset_element;
|
---|
259 | }
|
---|
260 | // Is this a folder match?
|
---|
261 | else if (filename == null && filename_pattern.equals(DIRECTORY_FILENAME)) {
|
---|
262 | // System.err.println("Existing folder fileset!");
|
---|
263 | return fileset_element;
|
---|
264 | }
|
---|
265 | }
|
---|
266 | }
|
---|
267 |
|
---|
268 | // No match found
|
---|
269 | return null;
|
---|
270 | }
|
---|
271 |
|
---|
272 |
|
---|
273 | /** Retrieve the document this class is wrapping. */
|
---|
274 | public Document getDocument() {
|
---|
275 | return base_document;
|
---|
276 | }
|
---|
277 |
|
---|
278 | /** Retrieve the metadata associated with the given filename. Keep track of what metadata should be overwritten and what should be accumulated. Also make note of the source file, and remove the metadata if required. */
|
---|
279 | // !! Michael has written a much nicer version of this function !!
|
---|
280 | // Kath has cleaned up this version a bit
|
---|
281 | public ArrayList getMetadata(String filename, boolean remove, ArrayList metadatum_so_far, File file, boolean append_folder_level) {
|
---|
282 | DebugStream.println("Get metadata for " + filename);
|
---|
283 | DebugStream.println("remove = " + remove + ", metadata_so_far = " + (metadatum_so_far != null ? String.valueOf(metadatum_so_far.size()) : "null") + ", file = " + file + ", append_folder_level = " + append_folder_level);
|
---|
284 |
|
---|
285 | // Determine the file's path relative to the location of the metadata.xml file
|
---|
286 | String file_relative_path = ((filename != null) ? filename : "");
|
---|
287 | if (file_relative_path.endsWith(File.separator)) {
|
---|
288 | file_relative_path = file_relative_path.substring(0, file_relative_path.length() - File.separator.length());
|
---|
289 | }
|
---|
290 |
|
---|
291 | ArrayList metadatum = null;
|
---|
292 | ArrayList queued_for_removal = new ArrayList();
|
---|
293 | if(metadatum_so_far == null) {
|
---|
294 | metadatum = new ArrayList();
|
---|
295 | }
|
---|
296 | else {
|
---|
297 | metadatum = metadatum_so_far;
|
---|
298 | }
|
---|
299 | try {
|
---|
300 | // Retrieve the document element.
|
---|
301 | Element directorymetadata_element = base_document.getDocumentElement();
|
---|
302 | // Iterate through the filesets, checking the FileName child element against the target file's name using regular expression matching.
|
---|
303 | NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
|
---|
304 | for(int i = 0; i < fileset_elements.getLength(); i++) {
|
---|
305 | Element fileset_element = (Element) fileset_elements.item(i);
|
---|
306 | boolean fileset_matches = false;
|
---|
307 | // look through the filename elements of this and see if we have a match
|
---|
308 | NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
|
---|
309 | String filename_text = "";
|
---|
310 | for(int j = 0; j < filename_elements.getLength(); j++) {
|
---|
311 | Element filename_element = (Element) filename_elements.item(j);
|
---|
312 | filename_text = MSMUtils.getValue(filename_element);
|
---|
313 | if((filename != null && (filename.matches(filename_text) || (append_folder_level && filename.indexOf(File.separator) != -1 && filename_text.equals(filename.substring(0, filename.indexOf(File.separator)))))) || ((filename == null || append_folder_level) && filename_text.equals(DIRECTORY_FILENAME))) {
|
---|
314 | fileset_matches = true;
|
---|
315 | filename_element = null;
|
---|
316 | break;
|
---|
317 | }
|
---|
318 | filename_element = null;
|
---|
319 | }
|
---|
320 |
|
---|
321 | if (!fileset_matches) {
|
---|
322 | // go to teh next fileset
|
---|
323 | fileset_element = null;
|
---|
324 | continue;
|
---|
325 | }
|
---|
326 | // If this fileset matches add all of the metadata found in the fileset, remembering to abide by desired mode (accumulate vs. overwrite).
|
---|
327 | // We have to do this for each type of metadata
|
---|
328 | for(int z = 0; z < ALL_METADATA_TYPES.length; z++) {
|
---|
329 | NodeList metadata_elements = fileset_element.getElementsByTagName(ALL_METADATA_TYPES[z]);
|
---|
330 | for(int l = 0; l < metadata_elements.getLength(); l++) {
|
---|
331 | Element metadata_element = (Element) metadata_elements.item(l);
|
---|
332 | String raw_element = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
|
---|
333 | String mode = metadata_element.getAttribute(MODE_ATTRIBUTE);
|
---|
334 | String raw_value = MSMUtils.getValue(metadata_element);
|
---|
335 | // ***** LEGACY SUPPORT *****
|
---|
336 | // If this raw_value contains a '\' character, but no '\\', '[' or ']' characters, then replace the '\' with a '\\'
|
---|
337 | if(raw_value.indexOf(StaticStrings.ESCAPE_STR) != -1) {
|
---|
338 | DebugStream.println("Detected Legacy Path: " + raw_value);
|
---|
339 | raw_value = raw_value.replaceAll(StaticStrings.ESCAPE_PATTERN, StaticStrings.PIPE_STR);
|
---|
340 | DebugStream.println("Updated Path To: " + raw_value);
|
---|
341 | MSMUtils.setValue(metadata_element, raw_value);
|
---|
342 | }
|
---|
343 | // **************************
|
---|
344 | // Using the element string and value, retrieve a matching Metadata object from the cache
|
---|
345 | Metadata metadata = null;
|
---|
346 | // If this element has hierarchy values then we must ensure the raw value is a full path, not an index.
|
---|
347 | // Try to retrieve an already comstructed piece of metadata from file - but not if we are purging, as this will stuff up anything that is still using that metadata - such as the GTable
|
---|
348 | if(MetadataXMLFileManager.metadata_cache.contains(raw_element, raw_value)) {
|
---|
349 | ///ystem.err.println("HIT! Retrieve metadata from cache: " + raw_element + " -> " + raw_value + "\n");
|
---|
350 | metadata = (Metadata) MetadataXMLFileManager.metadata_cache.get(raw_element, raw_value);
|
---|
351 | }
|
---|
352 | else {
|
---|
353 | ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(raw_element);
|
---|
354 | if (element != null) {
|
---|
355 | GValueNode value = Metadata.getDefaultValueNode(element, raw_value);
|
---|
356 | ///ystem.err.println("Miss. Create new metadata: " + raw_element + " -> " + raw_value + "\n");
|
---|
357 | metadata = new Metadata(element, value);
|
---|
358 | MetadataXMLFileManager.metadata_cache.put(raw_element, raw_value, metadata);
|
---|
359 |
|
---|
360 | ///ystem.err.println("Added metadata to cache: " + raw_element + " -> " + raw_value + "\n");
|
---|
361 | value = null;
|
---|
362 | element = null;
|
---|
363 | }
|
---|
364 | }
|
---|
365 |
|
---|
366 | // Determine whether this metadata is file or folder level
|
---|
367 | if (metadata != null) {
|
---|
368 | // System.err.println("File relative path: " + file_relative_path + " Filename text: " + filename_text);
|
---|
369 | // Direct match to regular expression
|
---|
370 | if (file_relative_path.matches(filename_text)) {
|
---|
371 | boolean is_folder_level = filename_text.equals(".*") && !file_relative_path.equals("");
|
---|
372 | metadata.setFile(file);
|
---|
373 | metadata.setFileLevel(!is_folder_level);
|
---|
374 | }
|
---|
375 | // Indirect match to regular expression (always folder level)
|
---|
376 | else if (file_relative_path.startsWith(filename_text + File.separator)) {
|
---|
377 | metadata.setFile(new File(file, filename_text));
|
---|
378 | metadata.setFileLevel(false);
|
---|
379 | }
|
---|
380 |
|
---|
381 | // If mode is overwrite, then remove any previous values for this metadata element.
|
---|
382 | if(mode.equals("accumulate")) {
|
---|
383 | metadata.setAccumulate(true);
|
---|
384 | }
|
---|
385 | else {
|
---|
386 | metadata.setAccumulate(false);
|
---|
387 | ///ystem.err.println("Metadata overwrites: " + metadata);
|
---|
388 | for(int m = metadatum.size() - 1; m >= 0; m--) {
|
---|
389 | Metadata old_metadata = (Metadata) metadatum.get(m);
|
---|
390 | if(old_metadata.getElement().equals(metadata.getElement())) {
|
---|
391 | metadatum.remove(m);
|
---|
392 | ///ystem.err.println("Removing overridden metadata: " + old_metadata);
|
---|
393 | }
|
---|
394 | old_metadata = null;
|
---|
395 | }
|
---|
396 | }
|
---|
397 | mode = null;
|
---|
398 | // Add the completed metadata and clean up
|
---|
399 | ///ystem.err.println("Adding metadata: " + metadata);
|
---|
400 | metadatum.add(metadata);
|
---|
401 | // Having found our metadata check if the value from the xml matches the one from the gvaluenode. If not update it. This happens whenever hierarchy information is involved (indexes rapidly become obsolete).
|
---|
402 | // If remove was set, remove it. We can only remove pure file level metadata, or folder level iff we were asked for folder level.
|
---|
403 | ///atherer.println("Have we been asked to remove the metadata: " + metadata);
|
---|
404 | ///atherer.println("Given:");
|
---|
405 | ///atherer.println("\tremove = " + remove);
|
---|
406 | ///atherer.println("\tfilename = " + filename);
|
---|
407 | ///atherer.println("\tfilename_text = " + filename_text + "?");
|
---|
408 | if(remove && ((filename != null && filename.matches(filename_text) && !filename_text.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME)))) {
|
---|
409 | ///atherer.println("Yes! Queuing for Removal.");
|
---|
410 | queued_for_removal.add(metadata_element);
|
---|
411 | }
|
---|
412 | else {
|
---|
413 | ///atherer.println("No. Updating.");
|
---|
414 | String current_value = metadata.getValueNode().getFullPath(false);
|
---|
415 | ///ystem.err.println("Checking the current mdv path: " + current_value);
|
---|
416 | ///ystem.err.println("Against whats in the metadata file: " + raw_value);
|
---|
417 | if(!raw_value.equals(current_value)) {
|
---|
418 | MSMUtils.setValue(metadata_element, current_value);
|
---|
419 | }
|
---|
420 | }
|
---|
421 | }
|
---|
422 | metadata = null;
|
---|
423 | raw_value = null;
|
---|
424 | raw_element = null;
|
---|
425 | metadata_element = null;
|
---|
426 | } // for all metadata elements
|
---|
427 | metadata_elements = null;
|
---|
428 | } // for all metadata types
|
---|
429 |
|
---|
430 | // Now we remove any elements that have been queued for deletion
|
---|
431 | for(int a = 0; queued_for_removal != null && a < queued_for_removal.size(); a++) {
|
---|
432 | Element metadata_element = (Element) queued_for_removal.get(a);
|
---|
433 | Element parent = (Element) metadata_element.getParentNode();
|
---|
434 | parent.removeChild(metadata_element);
|
---|
435 |
|
---|
436 | up_to_date = false;
|
---|
437 | }
|
---|
438 | queued_for_removal.clear();
|
---|
439 |
|
---|
440 | // If the fileset no longer has any metadata remove it
|
---|
441 | NodeList metadata_elements = fileset_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
|
---|
442 | if (metadata_elements.getLength()==0) {
|
---|
443 | metadata_elements = fileset_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
|
---|
444 | if (metadata_elements.getLength()==0) {
|
---|
445 | directorymetadata_element.removeChild(fileset_element);
|
---|
446 | up_to_date = false;
|
---|
447 | }
|
---|
448 | }
|
---|
449 | metadata_elements = null;
|
---|
450 | fileset_element = null;
|
---|
451 | filename_text = null;
|
---|
452 | } // for each fileset element
|
---|
453 |
|
---|
454 | fileset_elements = null;
|
---|
455 | directorymetadata_element = null;
|
---|
456 | }
|
---|
457 | catch (Exception exception) {
|
---|
458 | DebugStream.printStackTrace(exception);
|
---|
459 | }
|
---|
460 | ///ystem.err.println("Found " + metadatum.size() + " pieces of metadata.");
|
---|
461 | queued_for_removal = null;
|
---|
462 | return metadatum;
|
---|
463 | }
|
---|
464 |
|
---|
465 | /** returns true if the document has at least one Metadata or XMetadata element */
|
---|
466 | public boolean hasMetadata() {
|
---|
467 | boolean has_meta = true;
|
---|
468 | try {
|
---|
469 | // Retrieve the document element.
|
---|
470 | Element directory_metadata_element = base_document.getDocumentElement();
|
---|
471 | NodeList metadata_nodes = directory_metadata_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
|
---|
472 | if (metadata_nodes.getLength()==0) {
|
---|
473 | // try extracted metadata
|
---|
474 | metadata_nodes = directory_metadata_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
|
---|
475 | if (metadata_nodes.getLength()==0) {
|
---|
476 | has_meta = false;
|
---|
477 | }
|
---|
478 | }
|
---|
479 | directory_metadata_element=null;
|
---|
480 | metadata_nodes=null;
|
---|
481 | }
|
---|
482 | catch (Exception error) {
|
---|
483 | DebugStream.printStackTrace(error);
|
---|
484 | }
|
---|
485 | return has_meta;
|
---|
486 | }
|
---|
487 |
|
---|
488 | /** Determine if this document has been saved recently, and thus xml file version is up to date. */
|
---|
489 | public boolean isUpToDate() {
|
---|
490 | return false;
|
---|
491 | }
|
---|
492 |
|
---|
493 | /** Determine is this is a valid Greenstone Directory Metadata file. It may of course just be some xml file with the name metadata.xml. */
|
---|
494 | public boolean isValid() {
|
---|
495 | // Just determine if the doctype is GreenstoneDirectoryMetadata and root node is called DirectoryMetadata.
|
---|
496 | String doctype_name = base_document.getDoctype().getName();
|
---|
497 | String root_name = base_document.getDocumentElement().getTagName();
|
---|
498 | return ((doctype_name.equals("GreenstoneDirectoryMetadata") && root_name.equals("GreenstoneDirectoryMetadata")) || (doctype_name.equals("DirectoryMetadata") && root_name.equals("DirectoryMetadata")));
|
---|
499 | }
|
---|
500 |
|
---|
501 | /** Remove all of the extracted metadata (XMetadata) from this document. */
|
---|
502 | public void removeExtractedMetadata() {
|
---|
503 | try {
|
---|
504 | Element document_element = base_document.getDocumentElement();
|
---|
505 | NodeList extracted_metadata_elements = document_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
|
---|
506 | document_element = null;
|
---|
507 | for(int i = extracted_metadata_elements.getLength(); i != 0; i--) {
|
---|
508 | Element extracted_metadata_element = (Element) extracted_metadata_elements.item(i - 1);
|
---|
509 | String element_name = extracted_metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
|
---|
510 | ElementWrapper element = Gatherer.c_man.getCollection().msm.getElement(element_name);
|
---|
511 | if(element != null) {
|
---|
512 | element.dec();
|
---|
513 | }
|
---|
514 | element = null;
|
---|
515 | ///ystem.err.println("Removing extracted metadata: " + element_name + "=" + MSMUtils.getValue(extracted_metadata_element));
|
---|
516 | element_name = null;
|
---|
517 | Node parent_node = extracted_metadata_element.getParentNode();
|
---|
518 | parent_node.removeChild(extracted_metadata_element);
|
---|
519 | parent_node = null;
|
---|
520 | extracted_metadata_element = null;
|
---|
521 | }
|
---|
522 | extracted_metadata_elements = null;
|
---|
523 | up_to_date = false;
|
---|
524 | }
|
---|
525 | catch(Exception exception) {
|
---|
526 | DebugStream.println("Exception in MetadataXMLFile.removeExtractedMetadata() - unexpected");
|
---|
527 | DebugStream.printStackTrace(exception);
|
---|
528 | }
|
---|
529 | }
|
---|
530 |
|
---|
531 | /** Remove the given metadata from this document.If filename is null, then removes directory level metadata, otherwise just removes it from the specified file. All directory level metadata is available under the FileSet with filename '.*'. There is at least one nasty case to consider, where the first overwriting metadata entry, of several with the same element, is removed. In this case the next entry must become overwrite to ensure proper inheritance. */
|
---|
532 | public void removeMetadata(String filename, Metadata metadata) {
|
---|
533 | DebugStream.println("Remove metadata: " + metadata + "\nFrom filename: " + filename);
|
---|
534 | try {
|
---|
535 | boolean found = false;
|
---|
536 | boolean first_metadata_element_found = true;
|
---|
537 | boolean make_next_metadata_element_overwrite = false;
|
---|
538 | boolean remove_fileset = false;
|
---|
539 | // is this extracted or normal metadata?
|
---|
540 | String removing_metadata_name = metadata.getElement().getName();
|
---|
541 | // If this is extracted metadata, we use a special element name that won't be recognized by greenstone
|
---|
542 | String current_metadata_type = StaticStrings.METADATA_ELEMENT;
|
---|
543 | if(removing_metadata_name.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
|
---|
544 | current_metadata_type = StaticStrings.EXTRACTED_METADATA_ELEMENT;
|
---|
545 | removing_metadata_name = removing_metadata_name.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
|
---|
546 | }
|
---|
547 |
|
---|
548 | // Retrieve the document element.
|
---|
549 | Element directorymetadata_element = base_document.getDocumentElement();
|
---|
550 | // Iterate through the filesets looking for the appropriate one.
|
---|
551 | NodeList fileset_elements = directorymetadata_element.getElementsByTagName(FILESET_ELEMENT);
|
---|
552 | for(int i = 0; !found && i < fileset_elements.getLength(); i++) {
|
---|
553 | Element fileset_element = (Element) fileset_elements.item(i);
|
---|
554 | NodeList filename_elements = fileset_element.getElementsByTagName(FILENAME_ELEMENT);
|
---|
555 | for(int j = 0; !found && j < filename_elements.getLength(); j++) {
|
---|
556 | Element filename_element = (Element) filename_elements.item(j);
|
---|
557 | String filename_text = MSMUtils.getValue(filename_element);
|
---|
558 | if((filename != null && filename.matches(filename_text) && !filename.equals(DIRECTORY_FILENAME)) || (filename == null && filename_text.equals(DIRECTORY_FILENAME))) {
|
---|
559 | // Retrieve the Metadata Elements for this fileset, and iterate through them looking for the one which we are to remove.
|
---|
560 | NodeList metadata_elements = fileset_element.getElementsByTagName(current_metadata_type);
|
---|
561 | for(int l = 0; (!found || !make_next_metadata_element_overwrite) && l < metadata_elements.getLength(); l++) {
|
---|
562 | Element metadata_element = (Element) metadata_elements.item(l);
|
---|
563 | String element = metadata_element.getAttribute("name");
|
---|
564 | String value = MSMUtils.getValue(metadata_element);
|
---|
565 | // See if this is the metadata we wish to remove
|
---|
566 | if(element.equals(removing_metadata_name)) {
|
---|
567 | if(value.equals(metadata.getValueNode().getFullPath(false))) {
|
---|
568 | // Remove it
|
---|
569 | ///ystem.err.println("Remove " + element + "-" + value);
|
---|
570 | Element parent_elem = (Element)metadata_element.getParentNode();
|
---|
571 | parent_elem.removeChild(metadata_element);
|
---|
572 |
|
---|
573 | //description_element.removeChild(metadata_element);
|
---|
574 | found = true;
|
---|
575 | // If this was the first metadata with this element found, and it was set to overwrite, then we have to ensure that the next metadata with this element found (if any) is changed to be overwrite now.
|
---|
576 | if(first_metadata_element_found && !metadata.accumulates()) {
|
---|
577 | ///ystem.err.println("First of this element found!");
|
---|
578 | make_next_metadata_element_overwrite = true;
|
---|
579 | }
|
---|
580 | }
|
---|
581 | // If this was the first metadata we've found with the element of the one to be removed set first found to false.
|
---|
582 | else if(first_metadata_element_found) {
|
---|
583 | ///ystem.err.println("Found a matching element: " + element + "=" + value);
|
---|
584 | first_metadata_element_found = false;
|
---|
585 | }
|
---|
586 | // Otherwise we should make this metadata overwrite as requested.
|
---|
587 | else if(make_next_metadata_element_overwrite) {
|
---|
588 | ///ystem.err.println("Changing to overwrite: " + element + "=" + value);
|
---|
589 | metadata_element.setAttribute(MODE_ATTRIBUTE, "");
|
---|
590 | }
|
---|
591 | }
|
---|
592 | value = null;
|
---|
593 | element = null;
|
---|
594 | metadata_element = null;
|
---|
595 | } // for each metadata
|
---|
596 | metadata_elements = null;
|
---|
597 | } // if the filename matches
|
---|
598 |
|
---|
599 | if (found) {
|
---|
600 | // if we found an element and removed it, we now want to check whether the fileset is empty or not
|
---|
601 | NodeList metadata_elements = fileset_element.getElementsByTagName(StaticStrings.METADATA_ELEMENT);
|
---|
602 | if (metadata_elements.getLength() ==0) {
|
---|
603 | metadata_elements = fileset_element.getElementsByTagName(StaticStrings.EXTRACTED_METADATA_ELEMENT);
|
---|
604 | if (metadata_elements.getLength() ==0) {
|
---|
605 | // remove the fileset
|
---|
606 | directorymetadata_element.removeChild(fileset_element);
|
---|
607 | }
|
---|
608 | }
|
---|
609 | metadata_elements = null;
|
---|
610 | }
|
---|
611 | filename_text = null;
|
---|
612 | filename_element = null;
|
---|
613 | } // for each filename element
|
---|
614 | filename_elements = null;
|
---|
615 | fileset_element = null;
|
---|
616 | } // for each fileset element
|
---|
617 | fileset_elements = null;
|
---|
618 | directorymetadata_element = null;
|
---|
619 | up_to_date = false;
|
---|
620 | }
|
---|
621 | catch (Exception error) {
|
---|
622 | DebugStream.printStackTrace(error);
|
---|
623 | }
|
---|
624 | }
|
---|
625 |
|
---|
626 | /** Change the up to date flag.
|
---|
627 | * @param up_to_date true if the document on the filesystem is the same as the one in memory, false otherwise
|
---|
628 | */
|
---|
629 | public void setUpToDate(boolean up_to_date) {
|
---|
630 | this.up_to_date = up_to_date;
|
---|
631 | }
|
---|
632 | }
|
---|