source: trunk/gli/src/org/greenstone/gatherer/msm/LegacyCollectionImporter.java@ 6884

Last change on this file since 6884 was 6879, checked in by mdewsnip, 20 years ago

Renamed GDM* classes to MetadataXMLFile*, for our sanity.

  • Property svn:keywords set to Author Date Id Revision
File size: 21.1 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.msm;
28/**************************************************************************************
29 * Written: 26/11/03
30 * Revised:
31 **************************************************************************************/
32import java.io.*;
33import java.util.*;
34import javax.swing.*;
35import org.w3c.dom.*;
36import org.greenstone.gatherer.Dictionary;
37import org.greenstone.gatherer.Gatherer;
38import org.greenstone.gatherer.cdm.Argument;
39import org.greenstone.gatherer.cdm.Classifier;
40import org.greenstone.gatherer.cdm.ClassifierManager;
41import org.greenstone.gatherer.cdm.CollectionDesignManager;
42import org.greenstone.gatherer.cdm.CommandTokenizer;
43import org.greenstone.gatherer.collection.CollectionManager;
44import org.greenstone.gatherer.file.FileManager;
45import org.greenstone.gatherer.file.FileQueue;
46import org.greenstone.gatherer.msm.ElementWrapper;
47import org.greenstone.gatherer.msm.MetadataSetManager;
48import org.greenstone.gatherer.msm.MSMProfiler;
49import org.greenstone.gatherer.msm.MSMPrompt;
50import org.greenstone.gatherer.util.Codec;
51import org.greenstone.gatherer.util.StaticStrings;
52import org.greenstone.gatherer.util.Utility;
53import org.greenstone.gatherer.valuetree.GValueModel;
54import org.greenstone.gatherer.valuetree.GValueNode;
55/** Importing metadata from legacy collections requires three distinct steps:
56 * 1. Import hfiles and build value tree for the appropriate element. This is quite tricky as the only place to determine the relation between hfiles and the elements is the collect.cfg file. Fortunately we can reuse a lot of code from SearchForMetadata.
57 * 2. Recurses through the import folder of a legacy collection, and prompts the user how to transform old skool metadata names into the new and improved namespace standard. Because of the optimization of only opening each metadata.xml file once, this method is so different from GreenstoneMetadataParser that I decided not to try to hang to two together, but instead to create this new class. GLI's becoming bloatware.
58 * 3. At the same time watch for values which come from subject hierarchies, and replace with the new, confirmably unique, value.
59 * 4. Update collection configuration as we should now be able to fix the classify commands anyway
60 * Ok so that is four. Four distinct steps. Definately four. Couldn't possibly be more than six.
61 */
62public class LegacyCollectionImporter
63 extends HashMap {
64
65 private CollectionDesignManager cdm;
66 private File collection_folder;
67
68 /** Constructor. The legacy collection importer object itself is a HashMap of element wrappers to HFile objects. Actually it starts off as a mapping of raw_element_name_str's to Files, but we hope to remedy that as we progress through importing metadata.
69 * @param collection_folder the base File folder for the collection, mostly used for profile matching
70 * @param cdm the CollectionDesignManager so we ca determine what elements map to what hierarchy file
71 */
72 public LegacyCollectionImporter(File collection_folder, CollectionDesignManager cdm) {
73 super();
74 this.cdm = cdm;
75 this.collection_folder = collection_folder;
76 // Prefill the HashMap with raw_element_name_str mappings to Files
77 // Retrieve all of the hierarchy classifiers
78 ArrayList hierarchy_classifiers_list = cdm.classifier_manager.getHierarchyClassifiers();
79 ///ystem.err.println("When Legacy importing found: " + hierarchy_classifiers_list.size() + " hierarchies");
80 for(int i = 0; i < hierarchy_classifiers_list.size(); i++) {
81 Classifier classifier = (Classifier) hierarchy_classifiers_list.get(i);
82 // Get the element name
83 Argument metadata_name_argument = classifier.getArgument(StaticStrings.METADATA_ARGUMENT);
84 String metadata_name_str = metadata_name_argument.getValue();
85 // Because of the way of things the elements may already have had ex. put infront of them
86 if(metadata_name_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
87 metadata_name_str = metadata_name_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
88 }
89 metadata_name_argument = null;
90 // Generate the file
91 Argument hfile_filename_argument = classifier.getArgument(StaticStrings.HFILE_ARGUMENT);
92 String hfile_filename_str = hfile_filename_argument.getValue();
93 hfile_filename_argument = null;
94 File hfile_file = new File(collection_folder, StaticStrings.ETC_FOLDER + File.separator + hfile_filename_str);
95 put(metadata_name_str, hfile_file);
96 ///ystem.err.println("Queued hierarchy information for later parsing: " + metadata_name_str + "->" + hfile_file.getAbsolutePath());
97 classifier = null;
98 }
99 }
100
101 public void updateCDM() {
102
103 }
104
105 public void importMetadata() {
106 importMetadata(new File(collection_folder, StaticStrings.IMPORT_FOLDER));
107 }
108
109 /** Processes the files in the import tree, editing metadata.xml files so they contain namespaced element names.
110 * @param file the current file we are inspecting
111 */
112 private void importMetadata(File file) {
113 // If its a directory we recurse into it searching for metadata.xml files
114 if(file.isDirectory()) {
115 File[] files = file.listFiles();
116 for(int i = 0; files != null && i < files.length; i++) {
117 importMetadata(files[i]);
118 }
119 files = null;
120 }
121 else {
122 // We only care about metadata.xml files
123 if(file.getName().equals(StaticStrings.METADATA_XML)) {
124 boolean changed = false; // only set if we change something
125 // Parse in the metadata.xml document
126 try {
127 Document document = Utility.parse(file.getAbsolutePath(), false);
128 // Now, if we haven't currently got any metadata sets available to the collection, then this is as far as we need go. Rename the metadata.xml file and then move on the to next one. Remember we always have one set, the extracted metadata set.
129 if(Gatherer.c_man.getCollection().msm.getSets().size() <= 1) {
130 // First of all we create a backup of the current metadata.xml
131 // I'll start by trying to rename it - but we all know how successful thats been in the past
132 boolean done = false;
133 try {
134 File old_file = new File(file.getAbsolutePath());
135 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
136 // Remove any existing backup
137 if(new_file.exists()) {
138 new_file.delete();
139 }
140 old_file.renameTo(new_file);
141 // Then test if the file has been renamed
142 if(new_file.exists()) {
143 ///ystem.err.println("Rename Gooooood");
144 done = true;
145 }
146 }
147 catch(Exception exception) {
148 ///ystem.err.println("Rename Baaaaaad");
149 }
150
151 // Failing that I'll copy it.
152 if(!done) {
153 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
154 Gatherer.f_man.getQueue().copyFile(file, new_file, null);
155 if(new_file.exists()) {
156 ///ystem.err.println("Copy Goooooood");
157 }
158 else {
159 ///ystem.err.println("Copy Baaaaaaad");
160 }
161 }
162 }
163 // Otherwise me have some metadata sets, and so can carry on importing
164 else {
165 // For each metadata DOM Element
166 NodeList metadata_DOM_elements = document.getDocumentElement().getElementsByTagName(StaticStrings.METADATA_ELEMENT);
167 for(int i = 0; i < metadata_DOM_elements.getLength(); i++) {
168 Element metadata_DOM_element = (Element) metadata_DOM_elements.item(i);
169 // Extract the metadata element name, then check profiles to see if we already know how to handle this element. Once done try to retrieve the element from msm. If that fails, prompt the user for how to import this element.
170 String raw_element_name_str = metadata_DOM_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
171 // Check profiles
172 if(Gatherer.c_man.getCollection().msm.profiler.containsAction(collection_folder.getAbsolutePath(), raw_element_name_str)) {
173 String element_name_str = Gatherer.c_man.getCollection().msm.profiler.getAction(collection_folder.getAbsolutePath(), raw_element_name_str);
174 // Update the DOM
175 if(element_name_str != null) {
176 metadata_DOM_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, element_name_str);
177 // Now to ensure GLI correctly shows this element as assigned we'll increment its occurances
178 ElementWrapper element_ew = Gatherer.c_man.getCollection().msm.getElement(element_name_str, true);
179 element_ew.inc();
180 // We now check if this element has a hierarchy, and if so we update the value
181 checkForHierarchy(element_ew, metadata_DOM_element);
182 element_ew = null;
183 }
184 // If the raw name is null now, we have been instructed to ignore this metadata, so we can delete it from the metadata.xml (MetadataXMLFile would do this anyway when it tried to save)
185 else {
186 // Retrieve the parent node
187 Node parent_DOM_node = metadata_DOM_element.getParentNode();
188 parent_DOM_node.removeChild(metadata_DOM_element);
189 parent_DOM_node = null;
190 }
191 changed = true;
192 element_name_str = null;
193 }
194 // No profile, lets hope it either matches straight away, or the user can decide what to do with it
195 else {
196 // Try to retrieve an element with this name from the msm. Only a perfect match is acceptable
197 ElementWrapper element_ew = Gatherer.c_man.getCollection().msm.getElement(raw_element_name_str, true);
198 // If an element was found, we increment the element count but thats it. Welcome to the only case where the DOM isn't changed
199 if(element_ew != null) {
200 element_ew.inc();
201 // We now check if this element has a hierarchy, and if so we update the value
202 checkForHierarchy(element_ew, metadata_DOM_element);
203 }
204 // If no match was found prompt the user as to how to proceed
205 else {
206 boolean confirmed = false;
207 boolean dialog_cancelled = false;
208 while(!confirmed) {
209 element_ew = Gatherer.c_man.getCollection().msm.prompt.selectElement(raw_element_name_str);
210 dialog_cancelled = Gatherer.c_man.getCollection().msm.prompt.wasDialogCancelled();
211 // If the user chooses something then add to profile, and process the DOM as appropriate
212 if(!dialog_cancelled) {
213 // Choosen an element to append or merge
214 if(element_ew != null) {
215 String element_name_str = element_ew.getName();
216 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_folder.getAbsolutePath(), raw_element_name_str, element_name_str);
217 metadata_DOM_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, element_name_str);
218 element_name_str = null;
219 element_ew.inc();
220
221 // We can now build the hfile properly is necessary
222 File hfile_file = (File) get(raw_element_name_str);
223 if(hfile_file != null) {
224 ///ystem.err.println("Processing the mapping for: " + raw_element_name_str);
225 // Remove the current mapping
226 remove(raw_element_name_str);
227 // Create the HFile
228 try {
229 HFile hfile = new HFile(element_ew, hfile_file);
230 // Store the mapping
231 ///ystem.err.println("Adding a hfile mapping for: " + element_ew);
232 put(element_ew, hfile);
233 // Now update the value
234 String raw_value_str = MSMUtils.getValue(metadata_DOM_element);
235 String new_value_str = hfile.getFullValue(raw_value_str);
236 MSMUtils.setValue(metadata_DOM_element, new_value_str);
237 raw_value_str = null;
238 new_value_str = null;
239 hfile = null;
240 }
241 catch (Exception exception) {
242 Gatherer.println("***** Exception in org.greenstone.gatherer.msm.LegacyCollectionImporter *****");
243 Gatherer.printStackTrace(exception);
244 }
245 }
246 element_ew = null;
247 }
248 // Choosen to ignore the element
249 else {
250 Gatherer.c_man.getCollection().msm.profiler.addAction(collection_folder.getAbsolutePath(), raw_element_name_str, null);
251 // Retrieve the parent node
252 Node parent_DOM_node = metadata_DOM_element.getParentNode();
253 parent_DOM_node.removeChild(metadata_DOM_element);
254 parent_DOM_node = null;
255 }
256 confirmed = true;
257 changed = true;
258 }
259 // Cancel only skips the current metadata.xml file - actually I'll ignore the file on a cancel, as otherwise it will bugger up MetadataXMLFileManager when it starts up
260 else {
261 // Confirm the cancel, informing the user that the metadata.xml will be completely ignored if they continue
262 confirmed = (JOptionPane.showConfirmDialog(Gatherer.g_man, Dictionary.get("MSM.Legacy.Delete_Metadata_XML", file.getAbsolutePath()), Dictionary.get("General.Warning"), JOptionPane.YES_NO_OPTION) == JOptionPane.YES_OPTION);
263 }
264 }
265 }
266 }
267 raw_element_name_str = null;
268 metadata_DOM_element = null;
269 }
270 // If the DOM has saved, write it back out again
271 if(changed) {
272 // First of all we create a backup of the current metadata.xml
273 // I'll start by trying to rename it - but we all know how successful thats been in the past
274 boolean done = false;
275 try {
276 File old_file = new File(file.getAbsolutePath());
277 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
278 // Remove any existing backup
279 if(new_file.exists()) {
280 new_file.delete();
281 }
282 old_file.renameTo(new_file);
283 // Then test if the file has been renamed
284 if(new_file.exists()) {
285 ///ystem.err.println("Rename Gooooood");
286 done = true;
287 }
288 }
289 catch(Exception exception) {
290 ///ystem.err.println("Rename Baaaaaad");
291 }
292
293 // Failing that I'll copy it.
294 if(!done) {
295 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
296 Gatherer.f_man.getQueue().copyFile(file, new_file, null);
297 if(new_file.exists()) {
298 ///ystem.err.println("Copy Goooooood");
299 }
300 else {
301 ///ystem.err.println("Copy Baaaaaaad");
302 }
303 }
304 Utility.export(document, file);
305 }
306 document = null;
307 }
308 }
309 catch(Exception exception) {
310 // Dump a stack trace to debug
311 Gatherer.println("***** Exception in org.greenstone.gatherer.msm.LegacyCollectionImporter *****");
312 Gatherer.printStackTrace(exception);
313 // Then display a message complaining about this file
314 JOptionPane.showMessageDialog(Gatherer.g_man, Dictionary.get("MSM.Legacy.Corrupt_Metadata_XML", file.getAbsolutePath()), Dictionary.get("General.Error"), JOptionPane.ERROR_MESSAGE);
315 }
316 }
317 // Else you may go to the Devil's dam. Yours gifts are so good, heres none will hold you. Their love is not so great, Hortensio, but we may blow our nails together, and fast it fairly out.
318 }
319 }
320
321 private void checkForHierarchy(ElementWrapper element, Element metadata_DOM_element) {
322 // Iterator through the keys in the HashMap looking for a match to our element. Why, oh why, doesn't HashMap use element.equals(element) like its meant to?
323 HFile hfile = null;
324 for(Iterator keys = keySet().iterator(); hfile == null && keys.hasNext(); ) {
325 Object key_object = keys.next();
326 if(key_object instanceof ElementWrapper) {
327 ElementWrapper key = (ElementWrapper) key_object;
328 ///ystem.err.print("Does " + key + " equal " + element + "? ");
329 if(key.equals(element)) {
330 ///ystem.err.println("Yes!");
331 hfile = (HFile) get(key);
332 }
333 key = null;
334 }
335 key_object = null;
336 }
337 if(hfile != null) {
338 String raw_value_str = MSMUtils.getValue(metadata_DOM_element);
339 String new_value_str = hfile.getFullValue(raw_value_str);
340 MSMUtils.setValue(metadata_DOM_element, new_value_str);
341 raw_value_str = null;
342 new_value_str = null;
343 hfile = null;
344 }
345 }
346
347 /** Another basic HFile wrapper. This one expects you to provide an element when you create it, then as it is built it generates the value tree as well. Later it allows you to provide an alias and retrieve the full path string (delimited by pipes) */
348 private class HFile
349 extends HashMap {
350
351 public HFile(ElementWrapper element, File file)
352 throws Exception {
353 super();
354 HashMap index_to_entry = new HashMap();
355 // Read in the hfile, line by line, creating entry mappings
356 FileReader in_filereader = new FileReader(file);
357 BufferedReader in = new BufferedReader(in_filereader);
358 String line = null;
359 while((line = in.readLine()) != null) {
360 CommandTokenizer tokenizer = new CommandTokenizer(line);
361 String alias = tokenizer.nextToken();
362 String index = tokenizer.nextToken();
363 String value = Utility.decodeGreenstone(tokenizer.nextToken());
364 ///ystem.err.println("Read " + index + ", " + alias + ", " + value);
365 if(alias.startsWith("\"") && alias.endsWith("\"") && !alias.equals("\"\"")) {
366 alias = alias.substring(1, alias.length() - 1);
367 }
368 if(value.startsWith("\"") && value.endsWith("\"") && !value.equals("\"\"")) {
369 value = value.substring(1, value.length() - 1);
370 }
371 ///ystem.err.println("Storing:\nindex:" + index + "\nalias:" + alias + "\nvalue:" + value);
372 index_to_entry.put(index, new Entry(alias, value));
373 value = null;
374 alias = null;
375 index = null;
376 tokenizer = null;
377 }
378 in.close();
379 in_filereader.close();
380 in = null;
381 in_filereader = null;
382 // Now iterate through the HashMap creating ValueNodes and mappings as we go
383 for(Iterator keys = index_to_entry.keySet().iterator(); keys.hasNext(); ) {
384 String index = (String) keys.next();
385 Entry entry = (Entry) index_to_entry.get(index);
386 String alias = entry.alias;
387 String value = entry.value;
388 // Chop the last reference off index, as we already have it
389 if(index.indexOf(StaticStrings.STOP_CHARACTER) > -1) {
390 index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER));
391 // Then while theres still futher indexes left, retrieve them
392 while(index.length() > 0) {
393 // Retrieve that value (if any).
394 entry = (Entry) index_to_entry.get(index);
395 if(entry != null) {
396 // Precatenate with the current value separating with a pipe
397 value = entry.value + StaticStrings.PIPE_CHAR + value;
398 }
399 // Then trim the index down
400 if(index.indexOf(StaticStrings.STOP_CHARACTER) > -1) {
401 index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER));
402 }
403 else {
404 index="";
405 }
406 }
407 }
408 // Create the value node
409 GValueModel value_model = Gatherer.c_man.getCollection().msm.getValueTree(element);
410 GValueNode value_node = value_model.addValue(value);
411 // And place the mapping
412 put(alias, value_node);
413 }
414 index_to_entry.clear();
415 index_to_entry = null;
416
417 // Now we rename the old hfile
418 boolean done = false;
419 try {
420 File old_file = new File(file.getAbsolutePath());
421 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
422 // Remove any existing backup
423 if(new_file.exists()) {
424 new_file.delete();
425 }
426 old_file.renameTo(new_file);
427 // Then test if the file has been renamed
428 if(new_file.exists()) {
429 ///ystem.err.println("Rename Gooooood");
430 done = true;
431 }
432 }
433 catch(Exception exception) {
434 ///ystem.err.println("Rename Baaaaaad");
435 }
436
437 // Failing that I'll copy it.
438 if(!done) {
439 File new_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
440 Gatherer.f_man.getQueue().copyFile(file, new_file, null);
441 if(new_file.exists()) {
442 ///ystem.err.println("Copy Goooooood");
443 }
444 else {
445 ///ystem.err.println("Copy Baaaaaaad");
446 }
447 }
448 // Right. Get rid of original
449 file.delete();
450 }
451
452 public String getFullValue(String alias) {
453 ///ystem.err.println("Searching for the value for alias: " + alias);
454 for(Iterator keys = keySet().iterator(); keys.hasNext(); ) {
455 String key = (String) keys.next();
456 if(alias.equals(key)) {
457 GValueNode value_node = (GValueNode) get(key);
458 if(value_node != null) {
459 ///ystem.err.println("Found value: " + value_node.getFullPath(false));
460 return Codec.transform(value_node.getFullPath(false), Codec.TEXT_TO_DOM);
461 }
462 else {
463 ///ystem.err.println("Error!");
464 return alias; // Can't do any better
465 }
466 }
467 }
468 ///ystem.err.println("Error!");
469 return alias;
470 }
471
472 private class Entry {
473 public String alias;
474 public String value;
475 public Entry(String alias, String value) {
476 this.alias = alias;
477 this.value = value;
478 }
479 }
480 }
481}
Note: See TracBrowser for help on using the repository browser.