source: trunk/gli/src/org/greenstone/gatherer/msm/LegacyCollectionImporter.java@ 8236

Last change on this file since 8236 was 8236, checked in by mdewsnip, 20 years ago

Replaced all Gatherer.print* with DebugStream.print*.

  • Property svn:keywords set to Author Date Id Revision
File size: 17.8 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.msm;
28
29
30import java.io.*;
31import java.util.*;
32import org.w3c.dom.*;
33import org.greenstone.gatherer.DebugStream;
34import org.greenstone.gatherer.Gatherer;
35import org.greenstone.gatherer.cdm.Argument;
36import org.greenstone.gatherer.cdm.Classifier;
37import org.greenstone.gatherer.cdm.CollectionDesignManager;
38import org.greenstone.gatherer.msm.ElementWrapper;
39import org.greenstone.gatherer.msm.MetadataSetManager;
40import org.greenstone.gatherer.util.StaticStrings;
41import org.greenstone.gatherer.util.Utility;
42import org.greenstone.gatherer.valuetree.GValueModel;
43
44
45/**
46 * Rewrote almost all this class to fix a variety of bugs.
47 * Importing legacy collections involves three main steps:
48 *
49 * 1. Read the existing metadata.xml files and namespace them, usually by prompting the user
50 * to specify the mapping between old metadata elements and new elements.
51 *
52 * 2. Build complete value trees for the new metadata elements. This ensures that all the
53 * metadata will show up and that the hfiles are written out complete. Building the
54 * value trees involves parsing the old hfiles and processing hierarchical metadata
55 * into GLI format (where '|' is the level separator).
56 *
57 * 3. Fix up the classify commands in the collect.cfg file to specify the new element names.
58 *
59 * @author Michael Dewsnip
60 */
61public class LegacyCollectionImporter
62{
63 private File collection_folder;
64 private String collection_folder_path;
65 private CollectionDesignManager cdm;
66 private MetadataSetManager msm;
67 private HashMap source_metadata_to_hfile_mapping;
68 private boolean cancelled;
69
70
71 public LegacyCollectionImporter(File collection_folder, CollectionDesignManager cdm)
72 {
73 this.collection_folder = collection_folder;
74 this.collection_folder_path = collection_folder.getAbsolutePath();
75 this.cdm = cdm;
76 this.msm = Gatherer.c_man.getCollection().msm;
77
78 // Create a mapping from metadata element to hierarchy classifier
79 source_metadata_to_hfile_mapping = new HashMap();
80 ArrayList hierarchy_classifiers_list = cdm.classifier_manager.getHierarchyClassifiers();
81 for (int i = 0; i < hierarchy_classifiers_list.size(); i++) {
82 Classifier classifier = (Classifier) hierarchy_classifiers_list.get(i);
83 // System.err.println("Hierarchy classifier: " + classifier);
84
85 // Get the element name
86 Argument metadata_name_argument = classifier.getArgument(StaticStrings.METADATA_ARGUMENT);
87 String metadata_name = metadata_name_argument.getValue();
88 // System.err.println("Metadata name: " + metadata_name);
89
90 // Remove the extracted namespace if it has been added
91 if (metadata_name.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
92 metadata_name = metadata_name.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
93 }
94
95 // Parse the hfile for this Hierarchy classifier
96 Argument hfile_name_argument = classifier.getArgument(StaticStrings.HFILE_ARGUMENT);
97 String hfile_name = hfile_name_argument.getValue();
98
99 File hfile_file = new File(collection_folder, StaticStrings.ETC_FOLDER + File.separator + hfile_name);
100
101 // System.err.println("Checking hfile " + hfile_file + " for " + metadata_name);
102 HFile hfile = new HFile(hfile_file);
103 source_metadata_to_hfile_mapping.put(metadata_name, hfile);
104 }
105 }
106
107
108 // This copies all the existing metadata.xml files into a backup directory
109 public void backupMetadataXMLFiles(File collection_dir)
110 {
111 File import_dir = new File(collection_dir, Utility.IMPORT_DIR);
112 File import_bak_dir = new File(collection_dir, Utility.IMPORT_BAK_DIR);
113 import_bak_dir.mkdir();
114 copyMetadataXMLFiles(import_dir, import_bak_dir);
115 }
116
117
118 private void copyMetadataXMLFiles(File source_dir, File dest_dir)
119 {
120 if (source_dir == null || !source_dir.exists()) {
121 return;
122 }
123 // Find the metadata file in this dir
124 File meta_file = new File(source_dir, "metadata.xml");
125 if (meta_file.exists()) {
126 File new_meta_file = new File(dest_dir, "metadata.xml");
127 try {
128 dest_dir.mkdirs();
129 Gatherer.f_man.getQueue().copyFile(meta_file, new_meta_file, null);
130 if (!new_meta_file.exists()) {
131 throw new Exception("");
132 }
133 }
134 catch (Exception e) {
135 DebugStream.println("Exception: couldn't move the file " + meta_file.getPath() + e.getMessage());
136 }
137 }
138
139 // Now go through child directories
140 File [] children = source_dir.listFiles();
141 for (int i = 0; i < children.length; i++) {
142 File child = children[i];
143 if (child.isDirectory()) {
144 copyMetadataXMLFiles(child, new File(dest_dir, child.getName()));
145 }
146 }
147 }
148
149
150 public void importMetadata()
151 {
152 // Nothing to do if we don't have any metadata sets (apart from extracted) loaded
153 if (msm.getSets().size() <= 1) {
154 System.err.println("No metadata sets!");
155 return;
156 }
157
158 cancelled = false;
159 importMetadata(new File(collection_folder, StaticStrings.IMPORT_FOLDER));
160 }
161
162
163 private void importMetadata(File file)
164 {
165 if (file.isDirectory()) {
166 // Apply recursively to the contents of the directory
167 File[] files = file.listFiles();
168 if (files != null) {
169 for (int i = 0; i < files.length && !cancelled; i++) {
170 importMetadata(files[i]);
171 }
172 }
173
174 return;
175 }
176
177 // We only care about metadata.xml files
178 if (!file.getName().equals(StaticStrings.METADATA_XML)) {
179 return;
180 }
181
182 // Parse the metadata.xml file
183 // System.err.println("Importing metadata from " + file);
184 Document document = Utility.parse(file.getAbsolutePath(), false);
185
186 // Get a list of all the <Metadata> elements in the file, and put them in an array
187 NodeList metadata_elements_list = document.getDocumentElement().getElementsByTagName(StaticStrings.METADATA_ELEMENT);
188 Node[] metadata_elements = new Node[metadata_elements_list.getLength()];
189 for (int i = 0; i < metadata_elements_list.getLength(); i++) {
190 metadata_elements[i] = metadata_elements_list.item(i);
191 // System.err.println("Metadata element: " + MSMUtils.getValue(metadata_elements[i]));
192 // System.err.println("Metadata element parent: " + metadata_elements[i].getParentNode().getNodeName());
193 }
194
195 // Now, for each metadata element...
196 for (int i = 0; i < metadata_elements.length; i++) {
197 Element source_element = (Element) metadata_elements[i];
198 String source_element_name = source_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
199 // System.err.println("Source element name: " + source_element_name);
200
201 // Check if there is a profile already set up for this element
202 if (msm.profiler.containsAction(collection_folder_path, source_element_name)) {
203 String target_element_name = msm.profiler.getAction(collection_folder_path, source_element_name);
204
205 // Update the metadata element and move onto the next one
206 if (target_element_name != null) {
207 updateMetadataElement(source_element, target_element_name);
208 }
209 else {
210 // Element has been ignored, so remove it
211 source_element.getParentNode().removeChild(source_element);
212 }
213
214 continue;
215 }
216
217 // No profile, so check if the element is already in the metadata set
218 ElementWrapper target_element = msm.getElement(source_element_name, true);
219 if (target_element != null) {
220 String target_element_name = target_element.getName();
221
222 // Update the metadata element and move onto the next one
223 updateMetadataElement(source_element, target_element_name);
224 continue;
225 }
226
227 // We must ask the user how to process this metadata element
228 target_element = msm.prompt.selectElement(source_element_name);
229 if (msm.prompt.wasDialogCancelled()) {
230 cancelled = true;
231 return;
232 }
233
234 if (target_element == null) {
235 // The user has chosen to ignore this element, so remove it
236 source_element.getParentNode().removeChild(source_element);
237
238 // Add the user's choice to the profile for this collection
239 msm.profiler.addAction(collection_folder_path, source_element_name, null);
240 }
241 else {
242 // Replace the old metadata element name with the new one
243 String target_element_name = target_element.getName();
244
245 // Update the metadata element
246 updateMetadataElement(source_element, target_element_name);
247
248 // Add the user's choice to the profile for this collection
249 msm.profiler.addAction(collection_folder_path, source_element_name, target_element_name);
250 }
251 }
252
253 // ----------------------------------------------------------------------------------
254 // HACK CODE ADDED IN AT VERY LAST MINUTE FOR REWRITING METADATA.XML FILES
255
256 // Get a list of all the <FileSet> elements in the file, and put them in an array
257 NodeList fileset_elements_list = document.getDocumentElement().getElementsByTagName(MetadataXMLFile.FILESET_ELEMENT);
258 Node[] fileset_elements = new Node[fileset_elements_list.getLength()];
259 for (int i = 0; i < fileset_elements_list.getLength(); i++) {
260 fileset_elements[i] = fileset_elements_list.item(i);
261 }
262
263 // For each fileset element...
264 for (int i = 0; i < fileset_elements.length; i++) {
265 Node fileset_node = fileset_elements[i];
266 NodeList fileset_children = fileset_node.getChildNodes();
267 for (int j = 0; j < fileset_children.getLength(); j++) {
268 Node fileset_child = fileset_children.item(j);
269 if (fileset_child.getNodeName().equals(MetadataXMLFile.FILENAME_ELEMENT)) {
270 String child_filename = MSMUtils.getValue(fileset_child);
271 File child_file = new File(file.getParentFile(), child_filename);
272 if (child_file.isDirectory()) {
273 MetadataXMLFile child_metadata_xml_file = new MetadataXMLFile();
274 Document child_metadata_xml_file_document = child_metadata_xml_file.getDocument();
275
276 fileset_node = fileset_node.getParentNode().removeChild(fileset_node);
277
278 // Change the filename value to .*
279 MSMUtils.setValue((Element) fileset_child, ".*");
280
281 Node child_fileset_node = child_metadata_xml_file_document.importNode(fileset_node, true);
282 child_metadata_xml_file_document.getDocumentElement().appendChild(child_fileset_node);
283 Utility.export(child_metadata_xml_file_document, new File(child_file, StaticStrings.METADATA_XML));
284 }
285 break;
286 }
287 }
288 }
289
290 // END HACK CODE
291 // ----------------------------------------------------------------------------------
292
293 // Write the modified metadata.xml file back out
294 Utility.export(document, file);
295 }
296
297
298 private void updateMetadataElement(Element metadata_element, String new_element_name)
299 {
300 String source_element_name = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
301 HFile hfile = (HFile) source_metadata_to_hfile_mapping.get(source_element_name);
302
303 // Get the value of this metadata element
304 String element_value = MSMUtils.getValue(metadata_element);
305 if (hfile != null) {
306 // Map to the full value
307 String full_element_value = hfile.getFullValue(element_value);
308 if (full_element_value != null) {
309 element_value = full_element_value;
310 }
311 }
312
313 // Update the name and value of the metadata element
314 metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, new_element_name);
315 MSMUtils.setValue(metadata_element, element_value);
316
317 // Add the value of this metadata element to the value tree
318 GValueModel value_model = msm.getValueTree(msm.getElement(new_element_name, true));
319 value_model.addValue(element_value);
320 }
321
322
323 public void updateClassifiers()
324 {
325 // Update the metadata elements in each of the classifiers
326 for (int i = 0; i < cdm.classifier_manager.getSize(); i++) {
327 Classifier classifier = cdm.classifier_manager.getClassifier(i);
328 // System.err.println("Classifier: " + classifier);
329
330 // Update the "-metadata" value
331 mapClassifierArgumentToNewValue(classifier, StaticStrings.METADATA_ARGUMENT);
332
333 // Update the "-sort" value
334 mapClassifierArgumentToNewValue(classifier, "-sort");
335
336 // With Hierarchy classifiers, update the hfile arguments
337 if (classifier.getName().equalsIgnoreCase(StaticStrings.HIERARCHY_CLASSIFIER)) {
338 // Update the "-hfile" value
339 Argument hfile_argument = classifier.getArgument(StaticStrings.HFILE_ARGUMENT);
340 String hfile_value = hfile_argument.getValue();
341
342 // Find the source metadata element
343 Iterator keys = source_metadata_to_hfile_mapping.keySet().iterator();
344 while (keys.hasNext()) {
345 String source_metadata = (String) keys.next();
346 String hfile_name = ((HFile) source_metadata_to_hfile_mapping.get(source_metadata)).hfile_name;
347 if (hfile_name.equals(hfile_value)) {
348 // Update the metadata value to the new (namespaced) one
349 if (msm.profiler.containsAction(collection_folder_path, source_metadata)) {
350 String target_value = msm.profiler.getAction(collection_folder_path, source_metadata);
351 hfile_argument.setValue(target_value + ".txt");
352 }
353
354 break;
355 }
356 }
357 }
358
359 // System.err.println("Classifier (after): " + classifier);
360 }
361 }
362
363
364 private void mapClassifierArgumentToNewValue(Classifier classifier, String argument_name)
365 {
366 Argument argument = classifier.getArgument(argument_name);
367 if (argument == null) {
368 // there is no such argument
369 return;
370 }
371 String value = argument.getValue();
372 // System.err.println("Value: " + value);
373
374 // Remove the extracted namespace if it has been added
375 if (value.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
376 value = value.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
377 }
378
379 // Update the metadata value to the new (namespaced) one
380 if (msm.profiler.containsAction(collection_folder_path, value)) {
381 String target_value = msm.profiler.getAction(collection_folder_path, value);
382 argument.setValue(target_value);
383 }
384 }
385
386
387 /** Another basic HFile wrapper. This one expects you to provide an element when you create it, then as it is built it generates the value tree as well. Later it allows you to provide an alias and retrieve the full path string (delimited by pipes) */
388 private class HFile
389 {
390 public String hfile_name;
391 private HashMap index_to_entry_mapping;
392 private HashMap alias_to_value_mapping;
393
394 public HFile(File file)
395 {
396 hfile_name = file.getName();
397 index_to_entry_mapping = new HashMap();
398 alias_to_value_mapping = new HashMap();
399
400 try {
401 // Read in the hfile, line by line, creating entry mappings
402 //FileReader file_reader = new FileReader(file);
403 //BufferedReader buffered_reader = new BufferedReader(file_reader);
404 BufferedReader buffered_reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
405
406 String line = null;
407 while ((line = buffered_reader.readLine()) != null) {
408 // Read alias
409 String alias = "";
410 int alias_end;
411 if (line.indexOf("\"") == 0) {
412 alias_end = line.indexOf("\"", 1);
413 alias = line.substring(1, alias_end);
414 }
415 else {
416 alias_end = line.indexOf(" ", 1);
417 alias = line.substring(0, alias_end);
418 }
419 // System.err.println("Alias: " + alias);
420
421 int value_start = line.indexOf("\"", alias_end + 1);
422 int value_end = line.indexOf("\"", value_start + 1);
423 String value = line.substring(value_start + 1, value_end);
424 // System.err.println("Value: " + value);
425
426 // if (!alias.equals(value)) {
427 // System.err.println("Alias (" + alias + ") and value (" + value + ") differ!");
428 // }
429
430 String index = line.substring(alias_end + 1, value_start).trim();
431 // System.err.println("Index: " + index);
432
433 index_to_entry_mapping.put(index, new Entry(alias, value));
434 }
435
436 buffered_reader.close();
437 }
438 catch (Exception ex) {
439 System.err.println("Exception reading hfile " + file);
440 ex.printStackTrace();
441 }
442
443 Iterator index_keys = index_to_entry_mapping.keySet().iterator();
444 while (index_keys.hasNext()) {
445 String index = (String) index_keys.next();
446 String alias = ((Entry) index_to_entry_mapping.get(index)).alias;
447 String value = ((Entry) index_to_entry_mapping.get(index)).value;
448
449 // Chop the last reference off index, as we already have it
450 if (index.indexOf(StaticStrings.STOP_CHARACTER) > -1) {
451 index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER));
452 // Then while there are still futher indexes left, retrieve them
453 while (index.length() > 0) {
454 // Retrieve that value (if any).
455 Entry entry = (Entry) index_to_entry_mapping.get(index);
456 if (entry != null) {
457 // Precatenate with the current value separating with a pipe
458 value = entry.value + StaticStrings.PIPE_CHAR + value;
459 }
460 // Then trim the index down
461 if (index.indexOf(StaticStrings.STOP_CHARACTER) > -1) {
462 index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER));
463 }
464 else {
465 index = "";
466 }
467 }
468 }
469
470 alias_to_value_mapping.put(alias, value);
471 }
472
473 index_to_entry_mapping.clear();
474 }
475
476
477 public String getFullValue(String alias)
478 {
479 return (String) alias_to_value_mapping.get(alias);
480 }
481
482
483 private class Entry
484 {
485 public String alias;
486 public String value;
487
488 public Entry(String alias, String value) {
489 this.alias = alias;
490 this.value = value;
491 }
492 }
493 }
494}
Note: See TracBrowser for help on using the repository browser.