source: trunk/gli/src/org/greenstone/gatherer/msm/LegacyCollectionImporter.java@ 7097

Last change on this file since 7097 was 7097, checked in by mdewsnip, 20 years ago

Fixed the way the original metadata.xml files are backed up.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.9 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.msm;
28
29
30import java.io.*;
31import java.util.*;
32import org.w3c.dom.*;
33import org.greenstone.gatherer.Gatherer;
34import org.greenstone.gatherer.cdm.Argument;
35import org.greenstone.gatherer.cdm.Classifier;
36import org.greenstone.gatherer.cdm.CollectionDesignManager;
37import org.greenstone.gatherer.msm.ElementWrapper;
38import org.greenstone.gatherer.msm.MetadataSetManager;
39import org.greenstone.gatherer.util.StaticStrings;
40import org.greenstone.gatherer.util.Utility;
41import org.greenstone.gatherer.valuetree.GValueModel;
42
43
44/**
45 * Rewrote almost all this class to fix a variety of bugs.
46 * Importing legacy collections involves three main steps:
47 *
48 * 1. Read the existing metadata.xml files and namespace them, usually by prompting the user
49 * to specify the mapping between old metadata elements and new elements.
50 *
51 * 2. Build complete value trees for the new metadata elements. This ensures that all the
52 * metadata will show up and that the hfiles are written out complete. Building the
53 * value trees involves parsing the old hfiles and processing hierarchical metadata
54 * into GLI format (where '|' is the level separator).
55 *
56 * 3. Fix up the classify commands in the collect.cfg file to specify the new element names.
57 *
58 * @author Michael Dewsnip
59 */
60public class LegacyCollectionImporter
61{
62 private File collection_folder;
63 private String collection_folder_path;
64 private CollectionDesignManager cdm;
65 private MetadataSetManager msm;
66 private HashMap source_metadata_to_hfile_mapping;
67 private boolean cancelled;
68
69
70 public LegacyCollectionImporter(File collection_folder, CollectionDesignManager cdm)
71 {
72 this.collection_folder = collection_folder;
73 this.collection_folder_path = collection_folder.getAbsolutePath();
74 this.cdm = cdm;
75 this.msm = Gatherer.c_man.getCollection().msm;
76
77 // Create a mapping from metadata element to hierarchy classifier
78 source_metadata_to_hfile_mapping = new HashMap();
79 ArrayList hierarchy_classifiers_list = cdm.classifier_manager.getHierarchyClassifiers();
80 for (int i = 0; i < hierarchy_classifiers_list.size(); i++) {
81 Classifier classifier = (Classifier) hierarchy_classifiers_list.get(i);
82 // System.err.println("Hierarchy classifier: " + classifier);
83
84 // Get the element name
85 Argument metadata_name_argument = classifier.getArgument(StaticStrings.METADATA_ARGUMENT);
86 String metadata_name = metadata_name_argument.getValue();
87 // System.err.println("Metadata name: " + metadata_name);
88
89 // Remove the extracted namespace if it has been added
90 if (metadata_name.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
91 metadata_name = metadata_name.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
92 }
93
94 // Parse the hfile for this Hierarchy classifier
95 Argument hfile_name_argument = classifier.getArgument(StaticStrings.HFILE_ARGUMENT);
96 String hfile_name = hfile_name_argument.getValue();
97
98 File hfile_file = new File(collection_folder, StaticStrings.ETC_FOLDER + File.separator + hfile_name);
99
100 // System.err.println("Checking hfile " + hfile_file + " for " + metadata_name);
101 HFile hfile = new HFile(hfile_file);
102 source_metadata_to_hfile_mapping.put(metadata_name, hfile);
103 }
104 }
105
106
107 public void importMetadata()
108 {
109 // Nothing to do if we don't have any metadata sets (apart from extracted) loaded
110 if (msm.getSets().size() <= 1) {
111 System.err.println("No metadata sets!");
112 return;
113 }
114
115 cancelled = false;
116 importMetadata(new File(collection_folder, StaticStrings.IMPORT_FOLDER));
117 }
118
119
120 private void importMetadata(File file)
121 {
122 if (file.isDirectory()) {
123 // Apply recursively to the contents of the directory
124 File[] files = file.listFiles();
125 if (files != null) {
126 for (int i = 0; i < files.length && !cancelled; i++) {
127 importMetadata(files[i]);
128 }
129 }
130
131 return;
132 }
133
134 // We only care about metadata.xml files
135 if (!file.getName().equals(StaticStrings.METADATA_XML)) {
136 return;
137 }
138
139 // First, copy the original file to make a backup
140 File backup_file = new File(file.getParentFile(), StaticStrings.METADATA_BAK);
141 try {
142 Gatherer.f_man.getQueue().copyFile(file, backup_file, null);
143 if (!backup_file.exists()) {
144 throw new Error();
145 }
146 }
147 catch (Exception ex) {
148 System.err.println("Exception occurred while importing legacy collection.");
149 System.err.println("Error: Could not backup metadata.xml file to " + backup_file);
150 ex.printStackTrace();
151 System.err.println("Aborting legacy collection import.");
152 cancelled = true;
153 return;
154 }
155
156 // Parse the metadata.xml file
157 // System.err.println("Importing metadata from " + file);
158 Document document = Utility.parse(file.getAbsolutePath(), false);
159
160 // Get a list of all the <Metadata> elements in the file, and put them in an array
161 NodeList metadata_elements_list = document.getDocumentElement().getElementsByTagName(StaticStrings.METADATA_ELEMENT);
162 Node[] metadata_elements = new Node[metadata_elements_list.getLength()];
163 for (int i = 0; i < metadata_elements_list.getLength(); i++) {
164 metadata_elements[i] = metadata_elements_list.item(i);
165 }
166
167 // Now, for each metadata element...
168 for (int i = 0; i < metadata_elements.length; i++) {
169 Element source_element = (Element) metadata_elements[i];
170 String source_element_name = source_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
171 // System.err.println("Source element name: " + source_element_name);
172
173 // Check if there is a profile already set up for this element
174 if (msm.profiler.containsAction(collection_folder_path, source_element_name)) {
175 String target_element_name = msm.profiler.getAction(collection_folder_path, source_element_name);
176
177 // Update the metadata element and move onto the next one
178 if (target_element_name != null) {
179 updateMetadataElement(source_element, target_element_name);
180 }
181 else {
182 // Element has been ignored, so remove it
183 source_element.getParentNode().removeChild(source_element);
184 }
185
186 continue;
187 }
188
189 // No profile, so check if the element is already in the metadata set
190 ElementWrapper target_element = msm.getElement(source_element_name, true);
191 if (target_element != null) {
192 String target_element_name = target_element.getName();
193
194 // Update the metadata element and move onto the next one
195 updateMetadataElement(source_element, target_element_name);
196 continue;
197 }
198
199 // We must ask the user how to process this metadata element
200 target_element = msm.prompt.selectElement(source_element_name);
201 if (msm.prompt.wasDialogCancelled()) {
202 cancelled = true;
203 return;
204 }
205
206 if (target_element == null) {
207 // The user has chosen to ignore this element, so remove it
208 source_element.getParentNode().removeChild(source_element);
209
210 // Add the user's choice to the profile for this collection
211 msm.profiler.addAction(collection_folder_path, source_element_name, null);
212 }
213 else {
214 // Replace the old metadata element name with the new one
215 String target_element_name = target_element.getName();
216
217 // Update the metadata element
218 updateMetadataElement(source_element, target_element_name);
219
220 // Add the user's choice to the profile for this collection
221 msm.profiler.addAction(collection_folder_path, source_element_name, target_element_name);
222 }
223 }
224
225 // Write the modified metadata.xml file back out
226 Utility.export(document, file);
227 }
228
229
230 private void updateMetadataElement(Element metadata_element, String new_element_name)
231 {
232 String source_element_name = metadata_element.getAttribute(StaticStrings.NAME_ATTRIBUTE);
233 HFile hfile = (HFile) source_metadata_to_hfile_mapping.get(source_element_name);
234
235 // Get the value of this metadata element
236 String element_value = MSMUtils.getValue(metadata_element);
237 if (hfile != null) {
238 // Map to the full value
239 String full_element_value = hfile.getFullValue(element_value);
240 if (full_element_value != null) {
241 element_value = full_element_value;
242 }
243 }
244
245 // Update the name and value of the metadata element
246 metadata_element.setAttribute(StaticStrings.NAME_ATTRIBUTE, new_element_name);
247 MSMUtils.setValue(metadata_element, element_value);
248
249 // Add the value of this metadata element to the value tree
250 GValueModel value_model = msm.getValueTree(msm.getElement(new_element_name, true));
251 value_model.addValue(element_value);
252 }
253
254
255 public void updateClassifiers()
256 {
257 // Update the metadata elements in each of the classifiers
258 for (int i = 0; i < cdm.classifier_manager.getSize(); i++) {
259 Classifier classifier = cdm.classifier_manager.getClassifier(i);
260 // System.err.println("Classifier: " + classifier);
261
262 // Update the "-metadata" value
263 mapClassifierArgumentToNewValue(classifier, StaticStrings.METADATA_ARGUMENT);
264
265 // Update the "-sort" value
266 mapClassifierArgumentToNewValue(classifier, "-sort");
267
268 // With Hierarchy classifiers, update the hfile arguments
269 if (classifier.getName().equalsIgnoreCase(StaticStrings.HIERARCHY_CLASSIFIER)) {
270 // Update the "-hfile" value
271 Argument hfile_argument = classifier.getArgument(StaticStrings.HFILE_ARGUMENT);
272 String hfile_value = hfile_argument.getValue();
273
274 // Find the source metadata element
275 Iterator keys = source_metadata_to_hfile_mapping.keySet().iterator();
276 while (keys.hasNext()) {
277 String source_metadata = (String) keys.next();
278 String hfile_name = ((HFile) source_metadata_to_hfile_mapping.get(source_metadata)).hfile_name;
279 if (hfile_name.equals(hfile_value)) {
280 // Update the metadata value to the new (namespaced) one
281 if (msm.profiler.containsAction(collection_folder_path, source_metadata)) {
282 String target_value = msm.profiler.getAction(collection_folder_path, source_metadata);
283 hfile_argument.setValue(target_value + ".txt");
284 }
285
286 break;
287 }
288 }
289 }
290
291 // System.err.println("Classifier (after): " + classifier);
292 }
293 }
294
295
296 private void mapClassifierArgumentToNewValue(Classifier classifier, String argument_name)
297 {
298 Argument argument = classifier.getArgument(argument_name);
299 String value = argument.getValue();
300 // System.err.println("Value: " + value);
301
302 // Remove the extracted namespace if it has been added
303 if (value.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
304 value = value.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
305 }
306
307 // Update the metadata value to the new (namespaced) one
308 if (msm.profiler.containsAction(collection_folder_path, value)) {
309 String target_value = msm.profiler.getAction(collection_folder_path, value);
310 argument.setValue(target_value);
311 }
312 }
313
314
315 /** Another basic HFile wrapper. This one expects you to provide an element when you create it, then as it is built it generates the value tree as well. Later it allows you to provide an alias and retrieve the full path string (delimited by pipes) */
316 private class HFile
317 {
318 public String hfile_name;
319 private HashMap index_to_entry_mapping;
320 private HashMap alias_to_value_mapping;
321
322 public HFile(File file)
323 {
324 hfile_name = file.getName();
325 index_to_entry_mapping = new HashMap();
326 alias_to_value_mapping = new HashMap();
327
328 try {
329 // Read in the hfile, line by line, creating entry mappings
330 FileReader file_reader = new FileReader(file);
331 BufferedReader buffered_reader = new BufferedReader(file_reader);
332 String line = null;
333 while ((line = buffered_reader.readLine()) != null) {
334 // Read alias
335 String alias = "";
336 int alias_end;
337 if (line.indexOf("\"") == 0) {
338 alias_end = line.indexOf("\"", 1);
339 alias = line.substring(1, alias_end);
340 }
341 else {
342 alias_end = line.indexOf(" ", 1);
343 alias = line.substring(0, alias_end);
344 }
345 // System.err.println("Alias: " + alias);
346
347 int value_start = line.indexOf("\"", alias_end + 1);
348 int value_end = line.indexOf("\"", value_start + 1);
349 String value = line.substring(value_start + 1, value_end);
350 // System.err.println("Value: " + value);
351
352 // if (!alias.equals(value)) {
353 // System.err.println("Alias (" + alias + ") and value (" + value + ") differ!");
354 // }
355
356 String index = line.substring(alias_end + 1, value_start).trim();
357 // System.err.println("Index: " + index);
358
359 index_to_entry_mapping.put(index, new Entry(alias, value));
360 }
361
362 buffered_reader.close();
363 file_reader.close();
364 }
365 catch (Exception ex) {
366 System.err.println("Exception reading hfile " + file);
367 ex.printStackTrace();
368 }
369
370 Iterator index_keys = index_to_entry_mapping.keySet().iterator();
371 while (index_keys.hasNext()) {
372 String index = (String) index_keys.next();
373 String alias = ((Entry) index_to_entry_mapping.get(index)).alias;
374 String value = ((Entry) index_to_entry_mapping.get(index)).value;
375
376 // Chop the last reference off index, as we already have it
377 if (index.indexOf(StaticStrings.STOP_CHARACTER) > -1) {
378 index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER));
379 // Then while there are still futher indexes left, retrieve them
380 while (index.length() > 0) {
381 // Retrieve that value (if any).
382 Entry entry = (Entry) index_to_entry_mapping.get(index);
383 if (entry != null) {
384 // Precatenate with the current value separating with a pipe
385 value = entry.value + StaticStrings.PIPE_CHAR + value;
386 }
387 // Then trim the index down
388 if (index.indexOf(StaticStrings.STOP_CHARACTER) > -1) {
389 index = index.substring(0, index.lastIndexOf(StaticStrings.STOP_CHARACTER));
390 }
391 else {
392 index = "";
393 }
394 }
395 }
396
397 alias_to_value_mapping.put(alias, value);
398 }
399
400 index_to_entry_mapping.clear();
401 }
402
403
404 public String getFullValue(String alias)
405 {
406 return (String) alias_to_value_mapping.get(alias);
407 }
408
409
410 private class Entry
411 {
412 public String alias;
413 public String value;
414
415 public Entry(String alias, String value) {
416 this.alias = alias;
417 this.value = value;
418 }
419 }
420 }
421}
Note: See TracBrowser for help on using the repository browser.