source: trunk/gli/src/org/greenstone/gatherer/msm/LegacyCollectionImporter.java@ 7338

Last change on this file since 7338 was 7338, checked in by kjdon, 20 years ago

bug when opening a legacy collection with no import dir - now check for this before trying to copy metadata files

  • Property svn:keywords set to Author Date Id Revision
File size: 17.8 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.msm;
28
29
30import java.io.*;
31import java.util.*;
32import org.w3c.dom.*;
33import org.greenstone.gatherer.Gatherer;
34import org.greenstone.gatherer.cdm.Argument;
35import org.greenstone.gatherer.cdm.Classifier;
36import org.greenstone.gatherer.cdm.CollectionDesignManager;
37import org.greenstone.gatherer.msm.ElementWrapper;
38import org.greenstone.gatherer.msm.MetadataSetManager;
39import org.greenstone.gatherer.util.StaticStrings;
40import org.greenstone.gatherer.util.Utility;
41import org.greenstone.gatherer.valuetree.GValueModel;
42
43
44/**
45 * Rewrote almost all this class to fix a variety of bugs.
46 * Importing legacy collections involves three main steps:
47 *
48 * 1. Read the existing metadata.xml files and namespace them, usually by prompting the user
49 * to specify the mapping between old metadata elements and new elements.
50 *
51 * 2. Build complete value trees for the new metadata elements. This ensures that all the
52 * metadata will show up and that the hfiles are written out complete. Building the
53 * value trees involves parsing the old hfiles and processing hierarchical metadata
54 * into GLI format (where '|' is the level separator).
55 *
56 * 3. Fix up the classify commands in the collect.cfg file to specify the new element names.
57 *
58 * @author Michael Dewsnip
59 */
60public class LegacyCollectionImporter
61{
62 private File collection_folder;
63 private String collection_folder_path;
64 private CollectionDesignManager cdm;
65 private MetadataSetManager msm;
66 private HashMap source_metadata_to_hfile_mapping;
67 private boolean cancelled;
68
69
70 public LegacyCollectionImporter(File collection_folder, CollectionDesignManager cdm)
71 {
72 this.collection_folder = collection_folder;
73 this.collection_folder_path = collection_folder.getAbsolutePath();
74 this.cdm = cdm;
75 this.msm = Gatherer.c_man.getCollection().msm;
76
77 // Create a mapping from metadata element to hierarchy classifier
78 source_metadata_to_hfile_mapping = new HashMap();
79 ArrayList hierarchy_classifiers_list = cdm.classifier_manager.getHierarchyClassifiers();
80 for (int i = 0; i < hierarchy_classifiers_list.size(); i++) {
81 Classifier classifier = (Classifier) hierarchy_classifiers_list.get(i);
82 // System.err.println("Hierarchy classifier: " + classifier);
83
84 // Get the element name
85 Argument metadata_name_argument = classifier.getArgument(StaticStrings.METADATA_ARGUMENT);
86 String metadata_name = metadata_name_argument.getValue();
87 // System.err.println("Metadata name: " + metadata_name);
88
89 // Remove the extracted namespace if it has been added
90 if (metadata_name.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
91 metadata_name = metadata_name.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
92 }
93
94 // Parse the hfile for this Hierarchy classifier
95 Argument hfile_name_argument = classifier.getArgument(StaticStrings.HFILE_ARGUMENT);
96 String hfile_name = hfile_name_argument.getValue();
97
98 File hfile_file = new File(collection_folder, StaticStrings.ETC_FOLDER + File.separator + hfile_name);
99
100 // System.err.println("Checking hfile " + hfile_file + " for " + metadata_name);
101 HFile hfile = new HFile(hfile_file);
102 source_metadata_to_hfile_mapping.put(metadata_name, hfile);
103 }
104 }
105
106
107 // This copies all the existing metadata.xml files into a backup directory
108 public void backupMetadataXMLFiles(File collection_dir)
109 {
110 File import_dir = new File(collection_dir, Utility.IMPORT_DIR);
111 File import_bak_dir = new File(collection_dir, Utility.IMPORT_BAK_DIR);
112 import_bak_dir.mkdir();
113 copyMetadataXMLFiles(import_dir, import_bak_dir);
114 }
115
116
117 private void copyMetadataXMLFiles(File source_dir, File dest_dir)
118 {
119 if (source_dir == null || !source_dir.exists()) {
120 return;
121 }
122 // Find the metadata file in this dir
123 File meta_file = new File(source_dir, "metadata.xml");
124 if (meta_file.exists()) {
125 File new_meta_file = new File(dest_dir, "metadata.xml");
126 try {
127 dest_dir.mkdirs();
128 Gatherer.f_man.getQueue().copyFile(meta_file, new_meta_file, null);
129 if (!new_meta_file.exists()) {
130 throw new Exception("");
131 }
132 }
133 catch (Exception e) {
134 Gatherer.println("Exception: couldn't move the file " + meta_file.getPath() + e.getMessage());
135 }
136 }
137
138 // Now go through child directories
139 File [] children = source_dir.listFiles();
140 for (int i = 0; i < children.length; i++) {
141 File child = children[i];
142 if (child.isDirectory()) {
143 copyMetadataXMLFiles(child, new File(dest_dir, child.getName()));
144 }
145 }
146 }
147
148
149 public void importMetadata()
150 {
151 // Nothing to do if we don't have any metadata sets (apart from extracted) loaded
152 if (msm.getSets().size() <= 1) {
153 System.err.println("No metadata sets!");
154 return;
155 }
156
157 cancelled = false;
158 importMetadata(new File(collection_folder, StaticStrings.IMPORT_FOLDER));
159 }
160
161
162 private void importMetadata(File file)
163 {
164 if (file.isDirectory()) {
165 // Apply recursively to the contents of the directory
166 File[] files = file.listFiles();
167 if (files != null) {
168 for (int i = 0; i < files.length && !cancelled; i++) {
169 importMetadata(files[i]);
170 }
171 }
172
173 return;
174 }
175
176 // We only care about metadata.xml files
177 if (!file.getName().equals(StaticStrings.METADATA_XML)) {
178 return;
179 }
180
181 // Parse the metadata.xml file
182 // System.err.println("Importing metadata from " + file);
183 Document document = Utility.parse(file.getAbsolutePath(), false);
184
185 // Get a list of all the <Metadata> elements in the file, and put them in an array
186 NodeList metadata_elements_list = document.getDocumentElement().getElementsByTagName(StaticStrings.METADATA_ELEMENT);
187 Node[] metadata_elements = new Node[metadata_elements_list.getLength()];
188 for (int i = 0; i < metadata_elements_list.getLength(); i++) {
189 metadata_elements[i] = metadata_elements_list.item(i);
190 // System.err.println("Metadata element: " + MSMUtils.getValue(metadata_elements[i]));
191 // System.err.println("Metadata element parent: " + metadata_elements[i].getParentNode().getNodeName());
192 }
193
194 // Now, for each metadata element...
195 for (int i = 0; i < metadata_elements.length; i++) {
196 Element source_element = (Element) metadata_elements[i];
197 String source_element_name = source_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
198 // System.err.println("Source element name: " + source_element_name);
199
200 // Check if there is a profile already set up for this element
201 if (msm.profiler.containsAction(collection_folder_path, source_element_name)) {
202 String target_element_name = msm.profiler.getAction(collection_folder_path, source_element_name);
203
204 // Update the metadata element and move onto the next one
205 if (target_element_name != null) {
206 updateMetadataElement(source_element, target_element_name);
207 }
208 else {
209 // Element has been ignored, so remove it
210 source_element.getParentNode().removeChild(source_element);
211 }
212
213 continue;
214 }
215
216 // No profile, so check if the element is already in the metadata set
217 ElementWrapper target_element = msm.getElement(source_element_name, true);
218 if (target_element != null) {
219 String target_element_name = target_element.getName();
220
221 // Update the metadata element and move onto the next one
222 updateMetadataElement(source_element, target_element_name);
223 continue;
224 }
225
226 // We must ask the user how to process this metadata element
227 target_element = msm.prompt.selectElement(source_element_name);
228 if (msm.prompt.wasDialogCancelled()) {
229 cancelled = true;
230 return;
231 }
232
233 if (target_element == null) {
234 // The user has chosen to ignore this element, so remove it
235 source_element.getParentNode().removeChild(source_element);
236
237 // Add the user's choice to the profile for this collection
238 msm.profiler.addAction(collection_folder_path, source_element_name, null);
239 }
240 else {
241 // Replace the old metadata element name with the new one
242 String target_element_name = target_element.getName();
243
244 // Update the metadata element
245 updateMetadataElement(source_element, target_element_name);
246
247 // Add the user's choice to the profile for this collection
248 msm.profiler.addAction(collection_folder_path, source_element_name, target_element_name);
249 }
250 }
251
252 // ----------------------------------------------------------------------------------
253 // HACK CODE ADDED IN AT VERY LAST MINUTE FOR REWRITING METADATA.XML FILES
254
255 // Get a list of all the <FileSet> elements in the file, and put them in an array
256 NodeList fileset_elements_list = document.getDocumentElement().getElementsByTagName(MetadataXMLFile.FILESET_ELEMENT);
257 Node[] fileset_elements = new Node[fileset_elements_list.getLength()];
258 for (int i = 0; i < fileset_elements_list.getLength(); i++) {
259 fileset_elements[i] = fileset_elements_list.item(i);
260 }
261
262 // For each fileset element...
263 for (int i = 0; i < fileset_elements.length; i++) {
264 Node fileset_node = fileset_elements[i];
265 NodeList fileset_children = fileset_node.getChildNodes();
266 for (int j = 0; j < fileset_children.getLength(); j++) {
267 Node fileset_child = fileset_children.item(j);
268 if (fileset_child.getNodeName().equals(MetadataXMLFile.FILENAME_ELEMENT)) {
269 String child_filename = MSMUtils.getValue(fileset_child);
270 File child_file = new File(file.getParentFile(), child_filename);
271 if (child_file.isDirectory()) {
272 MetadataXMLFile child_metadata_xml_file = new MetadataXMLFile();
273 Document child_metadata_xml_file_document = child_metadata_xml_file.getDocument();
274
275 fileset_node = fileset_node.getParentNode().removeChild(fileset_node);
276
277 // Change the filename value to .*
278 MSMUtils.setValue((Element) fileset_child, ".*");
279
280 Node child_fileset_node = child_metadata_xml_file_document.importNode(fileset_node, true);
281 child_metadata_xml_file_document.getDocumentElement().appendChild(child_fileset_node);
282 Utility.export(child_metadata_xml_file_document, new File(child_file, StaticStrings.METADATA_XML));
283 }
284 break;
285 }
286 }
287 }
288
289 // END HACK CODE
290 // ----------------------------------------------------------------------------------
291
292 // Write the modified metadata.xml file back out
293 Utility.export(document, file);
294 }
295
296
297 private void updateMetadataElement(Element metadata_element, String new_element_name)
298 {
299 String source_element_name = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
300 HFile hfile = (HFile) source_metadata_to_hfile_mapping.get(source_element_name);
301
302 // Get the value of this metadata element
303 String element_value = MSMUtils.getValue(metadata_element);
304 if (hfile != null) {
305 // Map to the full value
306 String full_element_value = hfile.getFullValue(element_value);
307 if (full_element_value != null) {
308 element_value = full_element_value;
309 }
310 }
311
312 // Update the name and value of the metadata element
313 metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, new_element_name);
314 MSMUtils.setValue(metadata_element, element_value);
315
316 // Add the value of this metadata element to the value tree
317 GValueModel value_model = msm.getValueTree(msm.getElement(new_element_name, true));
318 value_model.addValue(element_value);
319 }
320
321
322 public void updateClassifiers()
323 {
324 // Update the metadata elements in each of the classifiers
325 for (int i = 0; i < cdm.classifier_manager.getSize(); i++) {
326 Classifier classifier = cdm.classifier_manager.getClassifier(i);
327 // System.err.println("Classifier: " + classifier);
328
329 // Update the "-metadata" value
330 mapClassifierArgumentToNewValue(classifier, StaticStrings.METADATA_ARGUMENT);
331
332 // Update the "-sort" value
333 mapClassifierArgumentToNewValue(classifier, "-sort");
334
335 // With Hierarchy classifiers, update the hfile arguments
336 if (classifier.getName().equalsIgnoreCase(StaticStrings.HIERARCHY_CLASSIFIER)) {
337 // Update the "-hfile" value
338 Argument hfile_argument = classifier.getArgument(StaticStrings.HFILE_ARGUMENT);
339 String hfile_value = hfile_argument.getValue();
340
341 // Find the source metadata element
342 Iterator keys = source_metadata_to_hfile_mapping.keySet().iterator();
343 while (keys.hasNext()) {
344 String source_metadata = (String) keys.next();
345 String hfile_name = ((HFile) source_metadata_to_hfile_mapping.get(source_metadata)).hfile_name;
346 if (hfile_name.equals(hfile_value)) {
347 // Update the metadata value to the new (namespaced) one
348 if (msm.profiler.containsAction(collection_folder_path, source_metadata)) {
349 String target_value = msm.profiler.getAction(collection_folder_path, source_metadata);
350 hfile_argument.setValue(target_value + ".txt");
351 }
352
353 break;
354 }
355 }
356 }
357
358 // System.err.println("Classifier (after): " + classifier);
359 }
360 }
361
362
363 private void mapClassifierArgumentToNewValue(Classifier classifier, String argument_name)
364 {
365 Argument argument = classifier.getArgument(argument_name);
366 if (argument == null) {
367 // there is no such argument
368 return;
369 }
370 String value = argument.getValue();
371 // System.err.println("Value: " + value);
372
373 // Remove the extracted namespace if it has been added
374 if (value.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
375 value = value.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
376 }
377
378 // Update the metadata value to the new (namespaced) one
379 if (msm.profiler.containsAction(collection_folder_path, value)) {
380 String target_value = msm.profiler.getAction(collection_folder_path, value);
381 argument.setValue(target_value);
382 }
383 }
384
385
386 /** Another basic HFile wrapper. This one expects you to provide an element when you create it, then as it is built it generates the value tree as well. Later it allows you to provide an alias and retrieve the full path string (delimited by pipes) */
387 private class HFile
388 {
389 public String hfile_name;
390 private HashMap index_to_entry_mapping;
391 private HashMap alias_to_value_mapping;
392
393 public HFile(File file)
394 {
395 hfile_name = file.getName();
396 index_to_entry_mapping = new HashMap();
397 alias_to_value_mapping = new HashMap();
398
399 try {
400 // Read in the hfile, line by line, creating entry mappings
401 //FileReader file_reader = new FileReader(file);
402 //BufferedReader buffered_reader = new BufferedReader(file_reader);
403 BufferedReader buffered_reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
404
405 String line = null;
406 while ((line = buffered_reader.readLine()) != null) {
407 // Read alias
408 String alias = "";
409 int alias_end;
410 if (line.indexOf("\"") == 0) {
411 alias_end = line.indexOf("\"", 1);
412 alias = line.substring(1, alias_end);
413 }
414 else {
415 alias_end = line.indexOf(" ", 1);
416 alias = line.substring(0, alias_end);
417 }
418 // System.err.println("Alias: " + alias);
419
420 int value_start = line.indexOf("\"", alias_end + 1);
421 int value_end = line.indexOf("\"", value_start + 1);
422 String value = line.substring(value_start + 1, value_end);
423 // System.err.println("Value: " + value);
424
425 // if (!alias.equals(value)) {
426 // System.err.println("Alias (" + alias + ") and value (" + value + ") differ!");
427 // }
428
429 String index = line.substring(alias_end + 1, value_start).trim();
430 // System.err.println("Index: " + index);
431
432 index_to_entry_mapping.put(index, new Entry(alias, value));
433 }
434
435 buffered_reader.close();
436 }
437 catch (Exception ex) {
438 System.err.println("Exception reading hfile " + file);
439 ex.printStackTrace();
440 }
441
442 Iterator index_keys = index_to_entry_mapping.keySet().iterator();
443 while (index_keys.hasNext()) {
444 String index = (String) index_keys.next();
445 String alias = ((Entry) index_to_entry_mapping.get(index)).alias;
446 String value = ((Entry) index_to_entry_mapping.get(index)).value;
447
448 // Chop the last reference off index, as we already have it
449 if (index.indexOf(StaticStrings.STOP_CHARACTER) > -1) {
450 index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER));
451 // Then while there are still futher indexes left, retrieve them
452 while (index.length() > 0) {
453 // Retrieve that value (if any).
454 Entry entry = (Entry) index_to_entry_mapping.get(index);
455 if (entry != null) {
456 // Precatenate with the current value separating with a pipe
457 value = entry.value + StaticStrings.PIPE_CHAR + value;
458 }
459 // Then trim the index down
460 if (index.indexOf(StaticStrings.STOP_CHARACTER) > -1) {
461 index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER));
462 }
463 else {
464 index = "";
465 }
466 }
467 }
468
469 alias_to_value_mapping.put(alias, value);
470 }
471
472 index_to_entry_mapping.clear();
473 }
474
475
476 public String getFullValue(String alias)
477 {
478 return (String) alias_to_value_mapping.get(alias);
479 }
480
481
482 private class Entry
483 {
484 public String alias;
485 public String value;
486
487 public Entry(String alias, String value) {
488 this.alias = alias;
489 this.value = value;
490 }
491 }
492 }
493}
Note: See TracBrowser for help on using the repository browser.