source: trunk/gli/src/org/greenstone/gatherer/msm/LegacyCollectionImporter.java@ 6318

Last change on this file since 6318 was 6093, checked in by jmt12, 20 years ago

Legacy importing will no longer prompt the user for metadata element merging instructions if no metadata set has been selected

  • Property svn:keywords set to Author Date Id Revision
File size: 21.2 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.msm;
28/**************************************************************************************
29 * Written: 26/11/03
30 * Revised:
31 **************************************************************************************/
32import java.io.*;
33import java.util.*;
34import javax.swing.*;
35import org.w3c.dom.*;
36import org.greenstone.gatherer.Dictionary;
37import org.greenstone.gatherer.Gatherer;
38import org.greenstone.gatherer.cdm.Argument;
39import org.greenstone.gatherer.cdm.Classifier;
40import org.greenstone.gatherer.cdm.ClassifierManager;
41import org.greenstone.gatherer.cdm.CollectionDesignManager;
42import org.greenstone.gatherer.cdm.CommandTokenizer;
43import org.greenstone.gatherer.collection.CollectionManager;
44import org.greenstone.gatherer.file.FileManager;
45import org.greenstone.gatherer.file.FileQueue;
46import org.greenstone.gatherer.msm.ElementWrapper;
47import org.greenstone.gatherer.msm.MetadataSetManager;
48import org.greenstone.gatherer.msm.MSMProfiler;
49import org.greenstone.gatherer.msm.MSMPrompt;
50import org.greenstone.gatherer.util.Codec;
51import org.greenstone.gatherer.util.StaticStrings;
52import org.greenstone.gatherer.util.Utility;
53import org.greenstone.gatherer.valuetree.GValueModel;
54import org.greenstone.gatherer.valuetree.GValueNode;
55/** Importing metadata from legacy collections requires three distinct steps:
56 * 1. Import hfiles and build value tree for the appropriate element. This is quite tricky as the only place to determine the relation between hfiles and the elements is the collect.cfg file. Fortunately we can reuse a lot of code from SearchForMetadata.
57 * 2. Recurses through the import folder of a legacy collection, and prompts the user how to transform old skool metadata names into the new and improved namespace standard. Because of the optimization of only opening each metadata.xml file once, this method is so different from GreenstoneMetadataParser that I decided not to try to hang to two together, but instead to create this new class. GLI's becoming bloatware.
58 * 3. At the same time watch for values which come from subject hierarchies, and replace with the new, confirmably unique, value.
59 * 4. Update collection configuration as we should now be able to fix the classify commands anyway
60 * Ok so that is four. Four distinct steps. Definately four. Couldn't possibly be more than six.
61 */
62public class LegacyCollectionImporter
63 extends HashMap {
64
65 private CollectionDesignManager cdm;
66 private File collection_folder;
67
68 /** Constructor. The legacy collection importer object itself is a HashMap of element wrappers to HFile objects. Actually it starts off as a mapping of raw_element_name_str's to Files, but we hope to remedy that as we progress through importing metadata.
69 * @param collection_folder the base File folder for the collection, mostly used for profile matching
70 * @param cdm the CollectionDesignManager so we ca determine what elements map to what hierarchy file
71 */
72 public LegacyCollectionImporter(File collection_folder, CollectionDesignManager cdm) {
73 super();
74 this.cdm = cdm;
75 this.collection_folder = collection_folder;
76 // Prefill the HashMap with raw_element_name_str mappings to Files
77 // Retrieve all of the hierarchy classifiers
78 ArrayList hierarchy_classifiers_list = cdm.classifier_manager.getHierarchyClassifiers();
79 ///ystem.err.println("When Legacy importing found: " + hierarchy_classifiers_list.size() + " hierarchies");
80 for(int i = 0; i < hierarchy_classifiers_list.size(); i++) {
81 Classifier classifier = (Classifier) hierarchy_classifiers_list.get(i);
82 // Get the element name
83 Argument metadata_name_argument = classifier.getArgument(StaticStrings.METADATA_ARGUMENT);
84 String metadata_name_str = metadata_name_argument.getValue();
85 // Because of the way of things the elements may already have had ex. put infront of them
86 if(metadata_name_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
87 metadata_name_str = metadata_name_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
88 }
89 metadata_name_argument = null;
90 // Generate the file
91 Argument hfile_filename_argument = classifier.getArgument(StaticStrings.HFILE_ARGUMENT);
92 String hfile_filename_str = hfile_filename_argument.getValue();
93 hfile_filename_argument = null;
94 File hfile_file = new File(collection_folder, StaticStrings.ETC_FOLDER + File.separator + hfile_filename_str);
95 put(metadata_name_str, hfile_file);
96 ///ystem.err.println("Queued hierarchy information for later parsing: " + metadata_name_str + "->" + hfile_file.getAbsolutePath());
97 classifier = null;
98 }
99 }
100
101 public void updateCDM() {
102
103 }
104
105 public void importMetadata() {
106 importMetadata(new File(collection_folder, StaticStrings.IMPORT_FOLDER));
107 }
108
109 /** Processes the files in the import tree, editing metadata.xml files so they contain namespaced element names.
110 * @param collection_folder the base folder of this collection, needed for storing importing profiles
111 * @param file the current file we are inspecting
112 */
113 private void importMetadata(File file) {
114 // If its a directory we recurse into it searching for metadata.xml files
115 if(file.isDirectory()) {
116 File[] files = file.listFiles();
117 for(int i = 0; files != null && i < files.length; i++) {
118 importMetadata(files[i]);
119 }
120 files = null;
121 }
122 else {
123 // We only care about metadata.xml files
124 if(file.getName().equals(StaticStrings.METADATA_XML)) {
125 boolean changed = false; // only set if we change something
126 // Parse in the metadata.xml document
127 try {
128 Document document = Utility.parse(file.getAbsolutePath(), false);
129 // Now, if we haven't currently got any metadata sets available to the collection, then this is as far as we need go. Rename the metadata.xml file and then move on the to next one. Remember we always have one set, the extracted metadata set.
130 if(Gatherer.c_man.getCollection().msm.getSets().size() <= 1) {
131 // First of all we create a backup of the current metadata.xml
132 // I'll start by trying to rename it - but we all know how successful thats been in the past
133 boolean done = false;
134 try {
135 File old_file = new File(file.getAbsolutePath());
136 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
137 // Remove any existing backup
138 if(new_file.exists()) {
139 new_file.delete();
140 }
141 old_file.renameTo(new_file);
142 // Then test if the file has been renamed
143 if(new_file.exists()) {
144 ///ystem.err.println("Rename Gooooood");
145 done = true;
146 }
147 }
148 catch(Exception exception) {
149 ///ystem.err.println("Rename Baaaaaad");
150 }
151
152 // Failing that I'll copy it.
153 if(!done) {
154 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
155 Gatherer.f_man.getQueue().copyFile(file, new_file, null);
156 if(new_file.exists()) {
157 ///ystem.err.println("Copy Goooooood");
158 }
159 else {
160 ///ystem.err.println("Copy Baaaaaaad");
161 }
162 }
163 }
164 // Otherwise me have some metadata sets, and so can carry on importing
165 else {
166 // For each metadata DOM Element
167 NodeList metadata_DOM_elements = document.getDocumentElement().getElementsByTagName(StaticStrings.METADATA_ELEMENT);
168 for(int i = 0; i < metadata_DOM_elements.getLength(); i++) {
169 Element metadata_DOM_element = (Element) metadata_DOM_elements.item(i);
170 // Extract the metadata element name, then check profiles to see if we already know how to handle this element. Once done try to retrieve the element from msm. If that fails, prompt the user for how to import this element.
171 String raw_element_name_str = metadata_DOM_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
172 // Check profiles
173 if(Gatherer.c_man.getCollection().msm.profiler.containsAction(collection_folder.getAbsolutePath(), raw_element_name_str)) {
174 String element_name_str = Gatherer.c_man.getCollection().msm.profiler.getAction(collection_folder.getAbsolutePath(), raw_element_name_str);
175 // Update the DOM
176 if(element_name_str != null) {
177 metadata_DOM_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, element_name_str);
178 // Now to ensure GLI correctly shows this element as assigned we'll increment its occurances
179 ElementWrapper element_ew = Gatherer.c_man.getCollection().msm.getElement(element_name_str, true);
180 element_ew.inc();
181 // We now check if this element has a hierarchy, and if so we update the value
182 checkForHierarchy(element_ew, metadata_DOM_element);
183 element_ew = null;
184 }
185 // If the raw name is null now, we have been instructed to ignore this metadata, so we can delete it from the metadata.xml (GDMDocument would do this anyway when it tried to save)
186 else {
187 // Retrieve the parent node
188 Node parent_DOM_node = metadata_DOM_element.getParentNode();
189 parent_DOM_node.removeChild(metadata_DOM_element);
190 parent_DOM_node = null;
191 }
192 changed = true;
193 element_name_str = null;
194 }
195 // No profile, lets hope it either matches straight away, or the user can decide what to do with it
196 else {
197 // Try to retrieve an element with this name from the msm. Only a perfect match is acceptable
198 ElementWrapper element_ew = Gatherer.c_man.getCollection().msm.getElement(raw_element_name_str, true);
199 // If an element was found, we increment the element count but thats it. Welcome to the only case where the DOM isn't changed
200 if(element_ew != null) {
201 element_ew.inc();
202 // We now check if this element has a hierarchy, and if so we update the value
203 checkForHierarchy(element_ew, metadata_DOM_element);
204 }
205 // If no match was found prompt the user as to how to proceed
206 else {
207 boolean confirmed = false;
208 boolean dialog_cancelled = false;
209 while(!confirmed) {
210 element_ew = Gatherer.c_man.getCollection().msm.prompt.selectElement(raw_element_name_str);
211 dialog_cancelled = Gatherer.c_man.getCollection().msm.prompt.wasDialogCancelled();
212 // If the user chooses something then add to profile, and process the DOM as appropriate
213 if(!dialog_cancelled) {
214 // Choosen an element to append or merge
215 if(element_ew != null) {
216 String element_name_str = element_ew.getName();
217 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_folder.getAbsolutePath(), raw_element_name_str, element_name_str);
218 metadata_DOM_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, element_name_str);
219 element_name_str = null;
220 element_ew.inc();
221
222 // We can now build the hfile properly is necessary
223 File hfile_file = (File) get(raw_element_name_str);
224 if(hfile_file != null) {
225 ///ystem.err.println("Processing the mapping for: " + raw_element_name_str);
226 // Remove the current mapping
227 remove(raw_element_name_str);
228 // Create the HFile
229 try {
230 HFile hfile = new HFile(element_ew, hfile_file);
231 // Store the mapping
232 ///ystem.err.println("Adding a hfile mapping for: " + element_ew);
233 put(element_ew, hfile);
234 // Now update the value
235 String raw_value_str = MSMUtils.getValue(metadata_DOM_element);
236 String new_value_str = hfile.getFullValue(raw_value_str);
237 MSMUtils.setValue(metadata_DOM_element, new_value_str);
238 raw_value_str = null;
239 new_value_str = null;
240 hfile = null;
241 }
242 catch (Exception exception) {
243 Gatherer.println("***** Exception in org.greenstone.gatherer.msm.LegacyCollectionImporter *****");
244 Gatherer.printStackTrace(exception);
245 }
246 }
247 element_ew = null;
248 }
249 // Choosen to ignore the element
250 else {
251 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_folder.getAbsolutePath(), raw_element_name_str, null);
252 // Retrieve the parent node
253 Node parent_DOM_node = metadata_DOM_element.getParentNode();
254 parent_DOM_node.removeChild(metadata_DOM_element);
255 parent_DOM_node = null;
256 }
257 confirmed = true;
258 changed = true;
259 }
260 // Cancel only skips the current metadata.xml file - actually I'll ignore the file on a cancel, as otherwise it will bugger up GDMManager when it starts up
261 else {
262 // Confirm the cancel, informing the user that the metadata.xml will be completely ignored if they continue
263 confirmed = (JOptionPane.showConfirmDialog(Gatherer.g_man, Dictionary.get("MSM.Legacy.Delete_Metadata_XML", file.getAbsolutePath()), Dictionary.get("General.Warning"), JOptionPane.YES_NO_OPTION) == JOptionPane.YES_OPTION);
264 }
265 }
266 }
267 }
268 raw_element_name_str = null;
269 metadata_DOM_element = null;
270 }
271 // If the DOM has saved, write it back out again
272 if(changed) {
273 // First of all we create a backup of the current metadata.xml
274 // I'll start by trying to rename it - but we all know how successful thats been in the past
275 boolean done = false;
276 try {
277 File old_file = new File(file.getAbsolutePath());
278 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
279 // Remove any existing backup
280 if(new_file.exists()) {
281 new_file.delete();
282 }
283 old_file.renameTo(new_file);
284 // Then test if the file has been renamed
285 if(new_file.exists()) {
286 ///ystem.err.println("Rename Gooooood");
287 done = true;
288 }
289 }
290 catch(Exception exception) {
291 ///ystem.err.println("Rename Baaaaaad");
292 }
293
294 // Failing that I'll copy it.
295 if(!done) {
296 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
297 Gatherer.f_man.getQueue().copyFile(file, new_file, null);
298 if(new_file.exists()) {
299 ///ystem.err.println("Copy Goooooood");
300 }
301 else {
302 ///ystem.err.println("Copy Baaaaaaad");
303 }
304 }
305 Utility.export(document, file);
306 }
307 document = null;
308 }
309 }
310 catch(Exception exception) {
311 // Dump a stack trace to debug
312 Gatherer.println("***** Exception in org.greenstone.gatherer.msm.LegacyCollectionImporter *****");
313 Gatherer.printStackTrace(exception);
314 // Then display a message complaining about this file
315 JOptionPane.showMessageDialog(Gatherer.g_man, Dictionary.get("MSM.Legacy.Corrupt_Metadata_XML", file.getAbsolutePath()), Dictionary.get("General.Error"), JOptionPane.ERROR_MESSAGE);
316 }
317 }
318 // Else you may go to the Devil's dam. Yours gifts are so good, heres none will hold you. Their love is not so great, Hortensio, but we may blow our nails together, and fast it fairly out.
319 }
320 }
321
322 private void checkForHierarchy(ElementWrapper element, Element metadata_DOM_element) {
323 // Iterator through the keys in the HashMap looking for a match to our element. Why, oh why, doesn't HashMap use element.equals(element) like its meant to?
324 HFile hfile = null;
325 for(Iterator keys = keySet().iterator(); hfile == null && keys.hasNext(); ) {
326 Object key_object = keys.next();
327 if(key_object instanceof ElementWrapper) {
328 ElementWrapper key = (ElementWrapper) key_object;
329 ///ystem.err.print("Does " + key + " equal " + element + "? ");
330 if(key.equals(element)) {
331 ///ystem.err.println("Yes!");
332 hfile = (HFile) get(key);
333 }
334 key = null;
335 }
336 key_object = null;
337 }
338 if(hfile != null) {
339 String raw_value_str = MSMUtils.getValue(metadata_DOM_element);
340 String new_value_str = hfile.getFullValue(raw_value_str);
341 MSMUtils.setValue(metadata_DOM_element, new_value_str);
342 raw_value_str = null;
343 new_value_str = null;
344 hfile = null;
345 }
346 }
347
348 /** Another basic HFile wrapper. This one expects you to provide an element when you create it, then as it is built it generates the value tree as well. Later it allows you to provide an alias and retrieve the full path string (delimited by pipes) */
349 private class HFile
350 extends HashMap {
351
352 public HFile(ElementWrapper element, File file)
353 throws Exception {
354 super();
355 HashMap index_to_entry = new HashMap();
356 // Read in the hfile, line by line, creating entry mappings
357 FileReader in_filereader = new FileReader(file);
358 BufferedReader in = new BufferedReader(in_filereader);
359 String line = null;
360 while((line = in.readLine()) != null) {
361 CommandTokenizer tokenizer = new CommandTokenizer(line);
362 String alias = tokenizer.nextToken();
363 String index = tokenizer.nextToken();
364 String value = Utility.decodeGreenstone(tokenizer.nextToken());
365 ///ystem.err.println("Read " + index + ", " + alias + ", " + value);
366 if(alias.startsWith("\"") && alias.endsWith("\"") && !alias.equals("\"\"")) {
367 alias = alias.substring(1, alias.length() - 1);
368 }
369 if(value.startsWith("\"") && value.endsWith("\"") && !value.equals("\"\"")) {
370 value = value.substring(1, value.length() - 1);
371 }
372 ///ystem.err.println("Storing:\nindex:" + index + "\nalias:" + alias + "\nvalue:" + value);
373 index_to_entry.put(index, new Entry(alias, value));
374 value = null;
375 alias = null;
376 index = null;
377 tokenizer = null;
378 }
379 in.close();
380 in_filereader.close();
381 in = null;
382 in_filereader = null;
383 // Now iterate through the HashMap creating ValueNodes and mappings as we go
384 for(Iterator keys = index_to_entry.keySet().iterator(); keys.hasNext(); ) {
385 String index = (String) keys.next();
386 Entry entry = (Entry) index_to_entry.get(index);
387 String alias = entry.alias;
388 String value = entry.value;
389 // Chop the last reference off index, as we already have it
390 if(index.indexOf(StaticStrings.STOP_CHARACTER) > -1) {
391 index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER));
392 // Then while theres still futher indexes left, retrieve them
393 while(index.length() > 0) {
394 // Retrieve that value (if any).
395 entry = (Entry) index_to_entry.get(index);
396 if(entry != null) {
397 // Precatenate with the current value separating with a pipe
398 value = entry.value + StaticStrings.PIPE_CHAR + value;
399 }
400 // Then trim the index down
401 if(index.indexOf(StaticStrings.STOP_CHARACTER) > -1) {
402 index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER));
403 }
404 else {
405 index="";
406 }
407 }
408 }
409 // Create the value node
410 GValueModel value_model = Gatherer.c_man.getCollection().msm.getValueTree(element);
411 GValueNode value_node = value_model.addValue(value);
412 // And place the mapping
413 put(alias, value_node);
414 }
415 index_to_entry.clear();
416 index_to_entry = null;
417
418 // Now we rename the old hfile
419 boolean done = false;
420 try {
421 File old_file = new File(file.getAbsolutePath());
422 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
423 // Remove any existing backup
424 if(new_file.exists()) {
425 new_file.delete();
426 }
427 old_file.renameTo(new_file);
428 // Then test if the file has been renamed
429 if(new_file.exists()) {
430 ///ystem.err.println("Rename Gooooood");
431 done = true;
432 }
433 }
434 catch(Exception exception) {
435 ///ystem.err.println("Rename Baaaaaad");
436 }
437
438 // Failing that I'll copy it.
439 if(!done) {
440 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
441 Gatherer.f_man.getQueue().copyFile(file, new_file, null);
442 if(new_file.exists()) {
443 ///ystem.err.println("Copy Goooooood");
444 }
445 else {
446 ///ystem.err.println("Copy Baaaaaaad");
447 }
448 }
449 // Right. Get rid of original
450 file.delete();
451 }
452
453 public String getFullValue(String alias) {
454 ///ystem.err.println("Searching for the value for alias: " + alias);
455 for(Iterator keys = keySet().iterator(); keys.hasNext(); ) {
456 String key = (String) keys.next();
457 if(alias.equals(key)) {
458 GValueNode value_node = (GValueNode) get(key);
459 if(value_node != null) {
460 ///ystem.err.println("Found value: " + value_node.getFullPath(false));
461 return Codec.transform(value_node.getFullPath(false), Codec.TEXT_TO_DOM);
462 }
463 else {
464 ///ystem.err.println("Error!");
465 return alias; // Can't do any better
466 }
467 }
468 }
469 ///ystem.err.println("Error!");
470 return alias;
471 }
472
473 private class Entry {
474 public String alias;
475 public String value;
476 public Entry(String alias, String value) {
477 this.alias = alias;
478 this.value = value;
479 }
480 }
481 }
482}
Note: See TracBrowser for help on using the repository browser.