source: trunk/gli/src/org/greenstone/gatherer/cdm/CollectionConfiguration.java@ 5589

Last change on this file since 5589 was 5290, checked in by mdewsnip, 21 years ago

Added code so "show_progress" argument is always present with RecPlug (needed for accurate import progress bar).

  • Property svn:keywords set to Author Date Id Revision
File size: 66.4 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.cdm;
28import java.awt.*;
29import java.awt.event.*;
30import java.io.*;
31import java.util.*;
32import javax.swing.*;
33import org.greenstone.gatherer.Gatherer;
34import org.greenstone.gatherer.cdm.CommandTokenizer;
35import org.greenstone.gatherer.msm.MSMUtils;
36import org.greenstone.gatherer.util.DOMTree;
37import org.greenstone.gatherer.util.Codec;
38import org.greenstone.gatherer.util.StaticStrings;
39import org.greenstone.gatherer.util.Utility;
40import org.w3c.dom.*;
41/** This class provides either access to a pseudo-G3 document, or parses a collect.cfg file in such a way as to provide an xml-type view of its content. This later version is useful as it allows the manipulation and free form editing of a legacy collect.cfg file while still allowing the various CDM data managers to base themselves directly on this model (whereas they used to be independant ListModels which clobbered the ordering of unparsed commands).
42 * @author John Thompson, Greenstone Digital Library, University of Waikato
43 * @version 2.3d
44 */
45public class CollectionConfiguration
46 extends StaticStrings {
47
48 static public Document document;
49
50 static public void main(String[] args) {
51 if(args.length >= 1) {
52 File file = new File(args[0]);
53 CollectionConfiguration collect_cfg = new CollectionConfiguration(file);
54 collect_cfg.save(true);
55 collect_cfg.save(false);
56 collect_cfg = null;
57 }
58 else {
59 System.out.println("Usage: CollectionConfiguration <filename>");
60 }
61 }
62
63 /** Find the best insertion position for the given DOM Element. This should try to match command tag, and if found should then try to group by name or type (eg CollectionMeta), or append to end is no such grouping exists (eg PlugIns). Failing a command match it will check against the command order for the best insertion location.
64 * @param element the command Element to be inserted
65 * @return the Element which the given command should be inserted before, or null to append to end of list
66 */
67 static public Node findInsertionPoint(Element target_element) {
68 ///ystem.err.println("Find insertion point: " + target_element.getNodeName());
69 String target_element_name = target_element.getNodeName();
70 Element document_element = document.getDocumentElement();
71 // Try to find commands with the same tag.
72 NodeList matching_elements = document_element.getElementsByTagName(target_element_name);
73 // If we found matching elements, then we have our most likely insertion location, so check within for groupings
74 if(matching_elements.getLength() != 0) {
75 ///ystem.err.println("Found matching elements.");
76 // Only CollectionMeta are grouped.
77 if(target_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
78 ///ystem.err.println("Dealing with collection metadata");
79 // Special case: CollectionMeta can be added at either the start or end of a collection configuration file. However the start position is reserved for special metadata, so if no non-special metadata can be found we must append to the end.
80 // So if the command to be added is special add it immediately after any other special command
81 if(target_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
82 int index = 0;
83 Element matched_element = (Element) matching_elements.item(index);
84 Element sibling_element = (Element) matched_element.getNextSibling();
85 while(sibling_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
86 index++;
87 matched_element = (Element) matching_elements.item(index);
88 sibling_element = (Element) matched_element.getNextSibling();
89 }
90 if(sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
91 Element newline_element = document.createElement(NEWLINE_ELEMENT);
92 document_element.insertBefore(newline_element, sibling_element);
93 }
94 return sibling_element;
95 }
96 // Otherwise try to find a matching 'name' and add after the last one in that group.
97 else {
98 int index = 0;
99 target_element_name = target_element.getAttribute(NAME_ATTRIBUTE);
100 boolean found = false;
101 // Skip all of the special metadata
102 Element matched_element = (Element) matching_elements.item(index);
103 while(matched_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
104 index++;
105 matched_element = (Element) matching_elements.item(index);
106 }
107 // Begin search
108 while(!found && matched_element != null) {
109 if(matched_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
110 found = true;
111 }
112 else {
113 index++;
114 matched_element = (Element) matching_elements.item(index);
115 }
116 }
117 // If we found a match, we need to continue checking until we find the last name match.
118 if(found) {
119 index++;
120 Element previous_sibling = matched_element;
121 Element sibling_element = (Element) matching_elements.item(index);
122 while(sibling_element != null && sibling_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
123 previous_sibling = sibling_element;
124 index++;
125 sibling_element = (Element) matching_elements.item(index);
126 }
127 // Previous sibling now holds the command immediately before where we want to add, so find its next sibling and add to that. In this one case we can ignore new lines!
128 return previous_sibling.getNextSibling();
129 }
130 // If not found we just add after last metadata element
131 else {
132 Element last_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
133 return last_element.getNextSibling();
134 }
135 }
136
137 }
138 else {
139 ///ystem.err.println("Not dealing with collection meta.");
140 Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
141 // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
142 Node sibling_element = matched_element.getNextSibling();
143 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
144 Element newline_element = document.createElement(NEWLINE_ELEMENT);
145 document_element.insertBefore(newline_element, sibling_element);
146 }
147 return sibling_element; // Note that this may be null
148 }
149 }
150 ///ystem.err.println("No matching elements found.");
151 // Locate where this command is in the ordering
152 int command_index = -1;
153 for(int i = 0; command_index == -1 && i < COMMAND_ORDER.length; i++) {
154 if(COMMAND_ORDER[i].equals(target_element_name)) {
155 command_index = i;
156 }
157 }
158 ///ystem.err.println("Command index is: " + command_index);
159 // Now move forward, checking for existing elements in each of the preceeding command orders.
160 int preceeding_index = command_index - 1;
161 ///ystem.err.println("Searching before the target command.");
162 while(preceeding_index >= 0) {
163 matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[preceeding_index]);
164 // If we've found a match
165 if(matching_elements.getLength() > 0) {
166 // We add after the last element
167 Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
168 // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
169 Node sibling_element = matched_element.getNextSibling();
170 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
171 Element newline_element = document.createElement(NEWLINE_ELEMENT);
172 document_element.insertBefore(newline_element, sibling_element);
173 }
174 return sibling_element; // Note that this may be null
175 }
176 preceeding_index--;
177 }
178 // If all that fails, we now move backwards through the commands
179 int susceeding_index = command_index + 1;
180 ///ystem.err.println("Searching after the target command.");
181 while(susceeding_index < COMMAND_ORDER.length) {
182 matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[susceeding_index]);
183 // If we've found a match
184 if(matching_elements.getLength() > 0) {
185 // We add before the first element
186 Element matched_element = (Element) matching_elements.item(0);
187 // One final quick test. If the matched element is immediately preceeded by a NewLine command, then we insert another NewLine before the matched command, then return this new NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
188 Node sibling_element = matched_element.getPreviousSibling();
189 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
190 Element newline_element = document.createElement(NEWLINE_ELEMENT);
191 document_element.insertBefore(newline_element, sibling_element);
192 }
193 return sibling_element; // Note that this may be null
194 }
195 susceeding_index++;
196 }
197 // Well. Apparently there are no other commands in this collection configuration. So append away...
198 return null;
199 }
200
201 static public String toString(Element command_element, boolean show_extracted_namespace) {
202 String command_element_name = command_element.getNodeName();
203 if(command_element_name.equals(CLASSIFY_ELEMENT)) {
204 return self.classifyToString(command_element, show_extracted_namespace);
205 }
206 else if(command_element_name.equals(FORMAT_ELEMENT)) {
207 return self.formatToString(command_element, show_extracted_namespace);
208 }
209 else if(command_element_name.equals(INDEXES_ELEMENT)) {
210 return self.indexesToString(command_element, show_extracted_namespace);
211 }
212 else if(command_element_name.equals(INDEX_DEFAULT_ELEMENT)) {
213 return self.indexDefaultToString(command_element, show_extracted_namespace);
214 }
215 else if(command_element_name.equals(LANGUAGES_ELEMENT)) {
216 return self.languagesToString(command_element);
217 }
218 else if(command_element_name.equals(LANGUAGE_DEFAULT_ELEMENT)) {
219 return self.languageDefaultToString(command_element);
220 }
221 else if(command_element_name.equals(LEVELS_ELEMENT)) {
222 return self.levelsToString(command_element);
223 }
224 else if(command_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
225 return self.metadataToString(command_element, show_extracted_namespace);
226 }
227 else if(command_element_name.equals(COLLECTIONMETADATA_CREATOR_ELEMENT)) {
228 return self.metadataToString(command_element, show_extracted_namespace);
229 }
230 else if(command_element_name.equals(COLLECTIONMETADATA_MAINTAINER_ELEMENT)) {
231 return self.metadataToString(command_element, show_extracted_namespace);
232 }
233 else if(command_element_name.equals(COLLECTIONMETADATA_PUBLIC_ELEMENT)) {
234 return self.metadataToString(command_element, show_extracted_namespace);
235 }
236 else if(command_element_name.equals(COLLECTIONMETADATA_BETA_ELEMENT)) {
237 return self.metadataToString(command_element, show_extracted_namespace);
238 }
239 else if(command_element_name.equals(PLUGIN_ELEMENT)) {
240 return self.pluginToString(command_element, show_extracted_namespace);
241 }
242 else if(command_element_name.equals(SEARCHTYPE_ELEMENT)) {
243 return self.searchtypeToString(command_element);
244 }
245 else if(command_element_name.equals(SUBCOLLECTION_ELEMENT)) {
246 return self.subcollectionToString(command_element, show_extracted_namespace);
247 }
248 else if(command_element_name.equals(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) {
249 return self.subcollectionDefaultIndexToString(command_element);
250 }
251 else if(command_element_name.equals(SUBCOLLECTION_INDEXES_ELEMENT)) {
252 return self.subcollectionIndexesToString(command_element);
253 }
254 else if(command_element_name.equals(SUPERCOLLECTION_ELEMENT)) {
255 return self.supercollectionToString(command_element);
256 }
257 else if(command_element_name.equals(UNKNOWN_ELEMENT)) {
258 return self.unknownToString(command_element);
259 }
260 return "";
261 }
262
263 /** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix.
264 * @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string
265 * @return a HashMap containing the arguments parsed
266 */
267 static public HashMap parseArguments(CommandTokenizer tokenizer) {
268 HashMap arguments = new HashMap();
269 String name = null;
270 String value = null;
271 while(tokenizer.hasMoreTokens() || name != null) {
272 // First we retrieve a name if we need one.
273 if(name == null) {
274 name = tokenizer.nextToken();
275 }
276 // Now we attempt to retrieve a value
277 if(tokenizer.hasMoreTokens()) {
278 value = tokenizer.nextToken();
279 // Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop.
280 if(value.startsWith(StaticStrings.MINUS_CHARACTER)) {
281 arguments.put(name, null);
282 name = value;
283 }
284 // Otherwise we have a typical name->value pair ready to go
285 else {
286 arguments.put(name, value);
287 name = null;
288 }
289 }
290 // Otherwise its a binary flag
291 else {
292 arguments.put(name, null);
293 name = null;
294 }
295 }
296 return arguments;
297 }
298
299 static private ArrayList known_metadata;
300
301 static private CollectionConfiguration self;
302
303 static final private String EXTRACTED_PREFIX = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP;
304 /** Gives the preferred ordering of commands */
305 static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, StaticStrings.COLLECTIONMETADATA_BETA_ELEMENT, StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT};
306
307 /** ************************** Public Data Members ***************************/
308
309 /** ************************** Private Data Members ***************************/
310
311 private File collect_config_file;
312
313 /** ************************** Public Methods ***************************/
314
315 public CollectionConfiguration(File collect_config_file) {
316 this.self = this;
317 this.collect_config_file = collect_config_file;
318 // If collect_cfg is xml we can load it straight away
319 String collect_config_name = collect_config_file.getName();
320 if(collect_config_name.equals(COLLECTCONFIGURATION_XML)) {
321 // Parse with Utility but don't use class loader
322 document = Utility.parse(collect_config_file, false);
323 }
324 // Otherwise if this is a legacy collect.cfg file then read in the template and send to magic parser
325 else if(collect_config_name.equals(COLLECT_CFG)) {
326 document = Utility.parse(PSEUDO_COLLECTCONFIGURATION_XML, true);
327 parse(collect_config_file);
328 }
329 }
330
331 /** This debug facility shows the currently loaded collect.cfg or CollectConfig.xml file as a DOM tree. */
332 public void display() {
333 JDialog dialog = new JDialog(Gatherer.g_man, "Collection Configuration", false);
334 dialog.setSize(400,400);
335 JPanel content_pane = (JPanel) dialog.getContentPane();
336 final DOMTree tree = new DOMTree(document);
337 JButton refresh_button = new JButton("Refresh Tree");
338 refresh_button.addActionListener(new ActionListener() {
339 public void actionPerformed(ActionEvent event) {
340 tree.setDocument(document);
341 }
342 });
343 content_pane.setBorder(BorderFactory.createEmptyBorder(5,5,5,5));
344 content_pane.setLayout(new BorderLayout());
345 content_pane.add(new JScrollPane(tree), BorderLayout.CENTER);
346 content_pane.add(refresh_button, BorderLayout.SOUTH);
347 dialog.show();
348 }
349
350 public Element getBeta() {
351 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_BETA_ELEMENT, null, null);
352 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_BETA_STR);
353 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
354 return element;
355 }
356
357 public Element getCreator() {
358 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_CREATOR_ELEMENT, null, null);
359 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_CREATOR_STR);
360 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
361 return element;
362 }
363
364 public Element getDocumentElement() {
365 return document.getDocumentElement();
366 }
367
368 public File getFile() {
369 return collect_config_file;
370 }
371
372 /** Retrieve or create the languages Element. */
373 public Element getLanguages() {
374 return getOrCreateElementByTagName(LANGUAGES_ELEMENT, null, null);
375 }
376
377 public Element getLevels() {
378 return getOrCreateElementByTagName(LEVELS_ELEMENT, null, null);
379 }
380
381 public Element getMaintainer() {
382 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_MAINTAINER_ELEMENT, null, null);
383 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_MAINTAINER_STR);
384 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
385 return element;
386 }
387
388 /** Retrieve or create the indexes Element. Note that this method behaves differently from the other getBlah methods, in that it also has to keep in mind that indexes come in two flavours, MG and MGPP. */
389 public Element getMGIndexes() {
390 return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, FALSE_STR);
391 }
392
393 public Element getMGPPIndexes() {
394 return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, TRUE_STR);
395 }
396
397 public Element getPublic() {
398 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_PUBLIC_ELEMENT, null, null);
399 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_PUBLIC_STR);
400 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
401 return element;
402 }
403
404 /** Retrieve or create the searchtype element. */
405 public Element getSearchType() {
406 ///ystem.err.println("Get or create element by tag name: " + name);
407 Element document_element = document.getDocumentElement();
408 NodeList elements = document_element.getElementsByTagName(SEARCHTYPE_ELEMENT);
409 int elements_length = elements.getLength();
410 if(elements_length > 0) {
411 document_element = null;
412 return (Element) elements.item(0);
413 }
414 // Create the element
415 Element element = document.createElement(SEARCHTYPE_ELEMENT);
416 Node target_node = findInsertionPoint(element);
417 if(target_node != null) {
418 document_element.insertBefore(element, target_node);
419 }
420 else {
421 document_element.appendChild(element);
422 }
423 document_element = null;
424 // Append a default search type node - form
425 Element a_searchtype_element = CollectionDesignManager.collect_config.document.createElement(CollectionConfiguration.CONTENT_ELEMENT);
426 a_searchtype_element.setAttribute(CollectionConfiguration.NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]);
427 element.appendChild(a_searchtype_element);
428 return element;
429 }
430
431 /** Retrieve or create the subindexes Element. */
432 public Element getSubIndexes() {
433 return getOrCreateElementByTagName(SUBCOLLECTION_INDEXES_ELEMENT, null, null);
434 }
435
436 /** Retrieve or create the supercollections Element. */
437 public Element getSuperCollection() {
438 return getOrCreateElementByTagName(SUPERCOLLECTION_ELEMENT, null, null);
439 }
440
441 public void save() {
442 save(false);
443 }
444
445 public void save(boolean force_xml) {
446 if(collect_config_file.exists()) {
447 File original_file = new File(collect_config_file.getParentFile(), COLLECT_CFG);
448 File backup_file = new File(collect_config_file.getParentFile(), "collect.bak");
449 if(backup_file.exists()) {
450 backup_file.delete();
451 }
452 if(!original_file.renameTo(backup_file)) {
453 Gatherer.println("Can't rename collect.cfg");
454 }
455 }
456 if(force_xml || collect_config_file.getName().equals(COLLECTCONFIGURATION_XML)) {
457 ///ystem.err.println("Writing XML");
458 Utility.export(document, new File(collect_config_file.getParentFile(), COLLECTCONFIGURATION_XML));
459 }
460 else {
461 ///ystem.err.println("Writing text");
462 try {
463 FileWriter file_writer = new FileWriter(collect_config_file, false);
464 BufferedWriter buffered_writer = new BufferedWriter(file_writer);
465 // In order to write out an old style collect.cfg we have to traverse the model and do several 'cute' tricks to ensure the collect.cfg is valid (for instance while every metadata element has a language attribute, only second or subsequent metadata, for a certain name, needs a language argument - hence the known metadata array. Note that within GLI the language will always be shown, but it doesn't crash and burn like G2 does, te-he).
466 known_metadata = new ArrayList();
467 Element collect_config_element = document.getDocumentElement();
468 NodeList command_elements = collect_config_element.getChildNodes();
469 boolean just_wrote_newline = false; // Prevent two or more newlines in a row
470 for(int i = 0; i < command_elements.getLength(); i++) {
471 Node command_node = command_elements.item(i);
472 if(command_node instanceof Element) {
473 Element command_element = (Element) command_node;
474 // The only thing left are NewLine elements
475 if(command_element.getNodeName().equals(NEWLINE_ELEMENT) && !just_wrote_newline) {
476 buffered_writer.newLine();
477 just_wrote_newline = true;
478 }
479 // Anything else we write to file, but only if it has been assigned, the exception being the Indexes element which just get commented if unassigned (a side effect of MG && MGPP compatibility)
480 else if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR) || command_element.getNodeName().equals(INDEXES_ELEMENT) || command_element.getNodeName().equals(INDEX_DEFAULT_ELEMENT) || command_element.getNodeName().equals(LEVELS_ELEMENT)){
481 String command = toString(command_element, false);
482 if(command != null && command.length() > 0) {
483 write(buffered_writer, command);
484 buffered_writer.newLine();
485 just_wrote_newline = false;
486 }
487 }
488 }
489 }
490 buffered_writer.close();
491 known_metadata = null;
492 }
493 catch (Exception exception) {
494 Gatherer.println("Error in CollectionConfiguration.save(boolean): " + exception);
495 Gatherer.printStackTrace(exception);
496 }
497 }
498 }
499
500 /** ************************** Private Methods ***************************/
501
502 private String classifyToString(Element command_element, boolean show_extracted_namespace) {
503 StringBuffer text = new StringBuffer(CLASSIFY_STR);
504 text.append(TAB_CHARACTER);
505 text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
506 text.append(SPACE_CHARACTER);
507 NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
508 int option_elements_length = option_elements.getLength();
509 for(int j = 0; j < option_elements_length; j++) {
510 Element option_element = (Element) option_elements.item(j);
511 if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
512 text.append(StaticStrings.MINUS_CHARACTER);
513 text.append(option_element.getAttribute(NAME_ATTRIBUTE));
514 String value_str = MSMUtils.getValue(option_element);
515 if(value_str.length() > 0) {
516 // If the value happens to be the identifier of an extracted metadata element, then remove the namespace.
517 if(!show_extracted_namespace && value_str.startsWith(EXTRACTED_PREFIX)) {
518 value_str = value_str.substring(EXTRACTED_PREFIX.length());
519 }
520 text.append(SPACE_CHARACTER);
521 if(value_str.indexOf(SPACE_CHARACTER) == -1) {
522 text.append(value_str);
523 }
524 else {
525 text.append(SPEECH_CHARACTER);
526 text.append(value_str);
527 text.append(SPEECH_CHARACTER);
528 }
529 }
530 value_str = null;
531 if(j < option_elements_length - 1) {
532 text.append(SPACE_CHARACTER);
533 }
534 }
535 option_element = null;
536 }
537 option_elements = null;
538 return text.toString();
539 }
540
541 private String formatToString(Element command_element, boolean show_extracted_namespace) {
542 StringBuffer text = new StringBuffer(FORMAT_STR);
543 text.append(SPACE_CHARACTER);
544 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
545 text.append(SPACE_CHARACTER);
546 String value_str = command_element.getAttribute(VALUE_ATTRIBUTE);
547 if(value_str.length() != 0) {
548 text.append(value_str);
549 }
550 else {
551 value_str = MSMUtils.getValue(command_element);
552 // Remove any references to a namespace for extracted metadata
553 if(!show_extracted_namespace) {
554 value_str.replaceAll(EXTRACTED_PREFIX, "");
555 }
556 text.append(SPEECH_CHARACTER);
557 text.append(value_str);
558 text.append(SPEECH_CHARACTER);
559 }
560 value_str = null;
561 return text.toString();
562 }
563
564 /** Retrieve or create the indexes Element. */
565 private Element getOrCreateElementByTagName(String name, String conditional_attribute, String required_value) {
566 ///ystem.err.println("Get or create element by tag name: " + name);
567 Element document_element = document.getDocumentElement();
568 NodeList elements = document_element.getElementsByTagName(name);
569 int elements_length = elements.getLength();
570 if(elements_length > 0) {
571 if(conditional_attribute == null) {
572 document_element = null;
573 return (Element) elements.item(0);
574 }
575 else {
576 for(int i = 0; i < elements_length; i++) {
577 Element element = (Element) elements.item(i);
578 if(element.getAttribute(conditional_attribute).equals(required_value)) {
579 document_element = null;
580 return element;
581 }
582 element = null;
583 }
584 }
585 }
586 // Create the element
587 Element element = document.createElement(name);
588 // If there was a property set it
589 if(conditional_attribute != null) {
590 element.setAttribute(conditional_attribute, required_value);
591 }
592 Node target_node = findInsertionPoint(element);
593 if(target_node != null) {
594 document_element.insertBefore(element, target_node);
595 }
596 else {
597 document_element.appendChild(element);
598 }
599 document_element = null;
600 return element;
601 }
602
603 private String indexesToString(Element command_element, boolean show_extracted_namespace) {
604 boolean comment_only = false;
605 StringBuffer text = new StringBuffer("");
606 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
607 text.append("#");
608 comment_only = true;
609 }
610 text.append(INDEX_STR);
611 text.append(TAB_CHARACTER);
612 if(!comment_only) {
613 text.append(TAB_CHARACTER);
614 }
615 NodeList index_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
616 // For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list
617 int index_elements_length = index_elements.getLength();
618 for(int j = 0; j < index_elements_length; j++) {
619 Element index_element = (Element) index_elements.item(j);
620 String level_str = index_element.getAttribute(LEVEL_ATTRIBUTE);
621 if(level_str.length() > 0) {
622 text.append(level_str);
623 text.append(StaticStrings.COLON_CHARACTER);
624 }
625 NodeList content_elements = index_element.getElementsByTagName(CONTENT_ELEMENT);
626 int content_elements_length = content_elements.getLength();
627 // Don't output anything if no indexes are set
628 if(content_elements_length == 0) {
629 return null;
630 }
631 for(int k = 0; k < content_elements_length; k++) {
632 Element content_element = (Element) content_elements.item(k);
633 String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
634 if(!show_extracted_namespace && name_str.startsWith(EXTRACTED_PREFIX)) {
635 name_str = name_str.substring(EXTRACTED_PREFIX.length());
636 }
637 text.append(name_str);
638 name_str = null;
639 if(k < content_elements_length - 1) {
640 text.append(StaticStrings.COMMA_CHARACTER);
641 }
642 content_element = null;
643 }
644 if(j < index_elements_length - 1) {
645 text.append(SPACE_CHARACTER);
646 }
647 content_elements = null;
648 index_element = null;
649 }
650 index_elements = null;
651 return text.toString();
652 }
653
654 private String indexDefaultToString(Element command_element, boolean show_extracted_namespace) {
655 StringBuffer text = new StringBuffer("");
656 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
657 text.append("#");
658 }
659 text.append(INDEX_DEFAULT_STR);
660 text.append(TAB_CHARACTER);
661 text.append(command_element.getAttribute(LEVEL_ATTRIBUTE));
662 text.append(StaticStrings.COLON_CHARACTER);
663 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
664 int content_elements_length = content_elements.getLength();
665 for(int j = 0; j < content_elements_length; j++) {
666 Element content_element = (Element) content_elements.item(j);
667 String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
668 if(!show_extracted_namespace && name_str.startsWith(EXTRACTED_PREFIX)) {
669 name_str = name_str.substring(EXTRACTED_PREFIX.length());
670 }
671 text.append(name_str);
672 name_str = null;
673 if(j < content_elements_length - 1) {
674 text.append(StaticStrings.COMMA_CHARACTER);
675 }
676 content_element = null;
677 }
678 content_elements = null;
679 return text.toString();
680 }
681
682 private String languagesToString(Element command_element) {
683 StringBuffer text = new StringBuffer(LANGUAGES_STR);
684 text.append(TAB_CHARACTER);
685 // Retrieve all the languages and write them out in a space separated list
686 NodeList language_elements = command_element.getElementsByTagName(LANGUAGE_ELEMENT);
687 int language_elements_length = language_elements.getLength();
688 if(language_elements_length == 0) {
689 return null;
690 }
691 for(int j = 0; j < language_elements_length; j++) {
692 Element language_element = (Element) language_elements.item(j);
693 text.append(language_element.getAttribute(NAME_ATTRIBUTE));
694 if(j < language_elements_length - 1) {
695 text.append(SPACE_CHARACTER);
696 }
697 }
698 return text.toString();
699 }
700
701 private String languageDefaultToString(Element command_element) {
702 StringBuffer text = new StringBuffer(LANGUAGE_DEFAULT_STR);
703 text.append(TAB_CHARACTER);
704 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
705 return text.toString();
706 }
707
708 private String levelsToString(Element command_element) {
709 StringBuffer text = new StringBuffer("");
710 if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
711 text.append("#");
712 }
713 text.append(LEVELS_STR);
714 text.append(TAB_CHARACTER);
715 text.append(TAB_CHARACTER);
716 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
717 int content_elements_length = content_elements.getLength();
718 // Don't output anything if no levels are set.
719 if(content_elements_length == 0) {
720 return null;
721 }
722 for(int i = 0; i < content_elements_length; i++) {
723 Element content_element = (Element) content_elements.item(i);
724 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
725 text.append(SPACE_CHARACTER);
726 }
727 return text.substring(0, text.length() - 1);
728 }
729
730 static public String metadataToString(Element command_element, boolean text_value) {
731 // If there is no value attribute, then we don't write anything
732 String value_str = MSMUtils.getValue(command_element);
733 if(value_str == null || value_str.length() == 0) {
734 return "";
735 }
736 else {
737 StringBuffer text = new StringBuffer("");
738 String name_str = command_element.getAttribute(NAME_ATTRIBUTE);
739 // If the name is one of the special four, we don't write the collectionmeta first. Note the maintainer collectionmeta is singled out for 'prittying' reasons.
740 if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
741 text.append(name_str);
742 text.append(TAB_CHARACTER);
743 }
744 else if(name_str.equals(COLLECTIONMETADATA_BETA_STR) || name_str.equals(COLLECTIONMETADATA_CREATOR_STR) || name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
745 text.append(name_str);
746 text.append(TAB_CHARACTER);
747 text.append(TAB_CHARACTER);
748 }
749 else {
750 text.append(COLLECTIONMETADATA_STR);
751 text.append(TAB_CHARACTER);
752 text.append(name_str);
753 text.append(SPACE_CHARACTER);
754 String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE);
755 // If this is element is in english, and it is the first one found, we don't need to write the language argument.
756 if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) {
757 text.append(LBRACKET_CHARACTER);
758 text.append(LANGUAGE_ARGUMENT);
759 text.append(language_str);
760 text.append(RBRACKET_CHARACTER);
761 text.append(SPACE_CHARACTER);
762 }
763 if(known_metadata != null) {
764 known_metadata.add(name_str);
765 }
766 language_str = null;
767 }
768 name_str = null;
769
770 // The value string we retrieved will be encoded for xml, so we now decode it - to text if text_value set. This parameter was originally show_extracted_namespace, but sincethis is only true for 'toString()' commands from within the CDM, its good enough to determine if this toString() will be used to display on screen, or write to collect.cfg
771 if(text_value == CollectionMeta.TEXT) {
772 value_str = Codec.transform(value_str, Codec.DOM_TO_TEXT);
773 }
774 else {
775 value_str = Codec.transform(value_str, Codec.DOM_TO_GREENSTONE);
776 }
777
778 // We don't wrap the email addresses in quotes, nor any string without spaces
779 if(value_str.indexOf(SPACE_CHARACTER) == -1) {
780 text.append(value_str);
781 }
782 else {
783 text.append(SPEECH_CHARACTER);
784 text.append(value_str);
785 text.append(SPEECH_CHARACTER);
786 }
787 value_str = null;
788 return text.toString();
789 }
790 }
791
792 /** Parse a collect.cfg into a DOM model representation. */
793 private void parse(File collect_config_file) {
794 try {
795 Element collect_cfg_element = document.getDocumentElement();
796 // Read in the file command at a time.
797 FileReader in_reader = new FileReader(collect_config_file);
798 BufferedReader in = new BufferedReader(in_reader);
799 String command_str = null;
800 while((command_str = in.readLine()) != null) {
801 Element command_element = null;
802 // A command may be broken over several lines.
803 command_str = command_str.trim();
804 boolean eof = false;
805 while(!eof && command_str.endsWith(NEWLINE_CHARACTER)) {
806 String next_line = in.readLine();
807 if(next_line != null) {
808 next_line = next_line.trim();
809 if(next_line.length() > 0) {
810 // Remove the new line character
811 command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
812 // And append the next line, which due to the test above must be non-zero length
813 command_str = command_str + next_line;
814 }
815 next_line = null;
816 }
817 // If we've reached the end of the file theres nothing more we can do
818 else {
819 eof = true;
820 }
821 }
822 // If there is still a new line character, then we remove it and hope for the best
823 if(command_str.endsWith(NEWLINE_CHARACTER)) {
824 command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
825 }
826 // Now we've either got a command to parse...
827 if(command_str.length() != 0) {
828 // Start trying to figure out what it is
829 StringTokenizer tokenizer = new StringTokenizer(command_str);
830 String command_type = tokenizer.nextToken().toLowerCase();
831 tokenizer = null;
832 // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created
833 if(command_element == null && command_type.equals(CLASSIFY_STR)) {
834 command_element = parseClassify(command_str);
835 }
836 if(command_element == null && command_type.equals(FORMAT_STR)) {
837 command_element = parseFormat(command_str);
838 }
839 if(command_element == null && (command_type.equals(INDEX_STR) || command_type.equals(COMMENTED_INDEXES_STR))) {
840 command_element = parseIndex(command_str);
841 }
842 if(command_element == null && (command_type.equals(INDEX_DEFAULT_STR) || command_type.equals(COMMENTED_INDEX_DEFAULT_STR))) {
843 command_element = parseIndexDefault(command_str);
844 }
845 if(command_element == null && command_type.equals(LANGUAGES_STR)) {
846 command_element = parseLanguage(command_str);
847 }
848 if(command_element == null && command_type.equals(LANGUAGE_DEFAULT_STR)) {
849 command_element = parseLanguageDefault(command_str);
850 }
851 if(command_element == null && (command_type.equals(LEVELS_STR) || command_type.equals(COMMENTED_LEVELS_STR))) {
852 command_element = parseLevels(command_str);
853 }
854 if(command_element == null && command_type.equals(COLLECTIONMETADATA_STR)) {
855 command_element = parseMetadata(command_str);
856 }
857 if(command_element == null && (command_type.equals(COLLECTIONMETADATA_BETA_STR) || command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR))) {
858 command_element = parseMetadataSpecial(command_str);
859 }
860 if(command_element == null && command_type.equals(PLUGIN_STR)) {
861 command_element = parsePlugIn(command_str);
862 }
863 if(command_element == null && command_type.equals(SEARCHTYPE_STR)) {
864 command_element = parseSearchType(command_str);
865 }
866 if(command_element == null && command_type.equals(SUBCOLLECTION_STR)) {
867 command_element = parseSubCollection(command_str);
868 }
869 if(command_element == null && command_type.equals(SUBCOLLECTION_DEFAULT_INDEX_STR)) {
870 command_element = parseSubCollectionDefaultIndex(command_str);
871 }
872 if(command_element == null && command_type.equals(SUBCOLLECTION_INDEX_STR)) {
873 command_element = parseSubCollectionIndex(command_str);
874 }
875 if(command_element == null && (command_type.equals(SUPERCOLLECTION_STR) || command_type.equals(CCS_STR))) {
876 command_element = parseSuperCollection(command_str);
877 }
878 // Doesn't match any known type
879 command_type = null;
880 if(command_element == null) {
881 // No-one knows what to do with this command, so we create an Unknown command element
882 command_element = document.createElement(UNKNOWN_ELEMENT);
883 MSMUtils.setValue(command_element, command_str);
884 }
885 }
886 // Or an empty line to remember for later
887 else {
888 command_element = document.createElement(NEWLINE_ELEMENT);
889 }
890 // Now command element shouldn't be null so we append it to the collection config DOM
891 collect_cfg_element.appendChild(command_element);
892 }
893 }
894 catch(Exception exception) {
895 Gatherer.println("Error in CollectionConfiguration.parse(java.io.File): " + exception);
896 Gatherer.printStackTrace(exception);
897 }
898 }
899
900 private Element parseClassify(String command_str) {
901 Element command_element = null;
902 try {
903 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
904 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
905 if(tokenizer.countTokens() >= 4) {
906 command_element = document.createElement(CLASSIFY_ELEMENT);
907 // First token is classify
908 tokenizer.nextToken();
909 // The next token is the classifier type
910 command_element.setAttribute(TYPE_ATTRIBUTE, tokenizer.nextToken());
911 // Now we parse out the remaining arguments into a hashmapping from name to value
912 HashMap arguments = parseArguments(tokenizer);
913 // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
914 Iterator names = arguments.keySet().iterator();
915 while(names.hasNext()) {
916 String name = (String) names.next();
917 String value = (String) arguments.get(name); // Can be null
918 // The metadata argument gets added as the content attribute
919 if(name.equals(METADATA_ARGUMENT) && value != null) {
920 // The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace.
921 if(value.indexOf(MSMUtils.NS_SEP) == -1) {
922 value = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + value;
923 }
924 //command_element.setAttribute(CONTENT_ATTRIBUTE, value);
925 }
926 // Everything else is an Option Element
927 Element option_element = document.createElement(OPTION_ELEMENT);
928 option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
929 if(value != null) {
930 // Remove any speech marks appended in strings containing whitespace
931 if(value.startsWith(SPEECH_CHARACTER) && value.endsWith(SPEECH_CHARACTER)) {
932 value = value.substring(1, value.length() - 1);
933 }
934 MSMUtils.setValue(option_element, value);
935 }
936 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
937 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
938 command_element.appendChild(option_element);
939 option_element = null;
940 name = null;
941 value = null;
942 }
943 names = null;
944 arguments = null;
945 }
946 tokenizer = null;
947 }
948 catch(Exception error) {
949 }
950 return command_element;
951 }
952
953 private Element parseFormat(String command_str) {
954 Element command_element = null;
955 try {
956 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
957 if(tokenizer.countTokens() >= 3) {
958 command_element = document.createElement(FORMAT_ELEMENT);
959 // First token is format
960 tokenizer.nextToken();
961 command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
962 String format_value = tokenizer.nextToken();
963 // If the value is true or false we add it as an attribute
964 if(format_value.equalsIgnoreCase(TRUE_STR) || format_value.equalsIgnoreCase(FALSE_STR)) {
965 command_element.setAttribute(VALUE_ATTRIBUTE, format_value.toLowerCase());
966 }
967 // Otherwise it gets added as a text node
968 else {
969 // Strip any speech marks
970 if(format_value.startsWith(SPEECH_CHARACTER) && format_value.endsWith(SPEECH_CHARACTER)) {
971 format_value = format_value.substring(1, format_value.length() - 1);
972 }
973 MSMUtils.setValue(command_element, format_value);
974 }
975 format_value = null;
976 }
977 tokenizer = null;
978 }
979 catch(Exception exception) {
980 }
981 return command_element;
982 }
983
984 private Element parseIndex(String command_str) {
985 Element command_element = null;
986 try {
987 StringTokenizer tokenizer = new StringTokenizer(command_str);
988 String command = tokenizer.nextToken();
989 if(tokenizer.hasMoreTokens()) {
990 command_element = document.createElement(INDEXES_ELEMENT);
991 }
992 command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(INDEX_STR) ? TRUE_STR : FALSE_STR));
993 command = null;
994 while(tokenizer.hasMoreTokens()) {
995 Element index_element = document.createElement(INDEX_ELEMENT);
996 String index_str = tokenizer.nextToken();
997 // There are two types of index we have to consider. Old G2.38 and earlier use level:source tuplets while G2.39+ have just a single, non-comma separated list where order is important.
998 boolean old_index;
999 if(index_str.indexOf(COLON_CHARACTER) != -1) {
1000 old_index = true;
1001 index_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
1002 index_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
1003 command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR);
1004 }
1005 else {
1006 command_element.setAttribute(MGPP_ATTRIBUTE, TRUE_STR);
1007 old_index = false;
1008 }
1009 StringTokenizer content_tokenizer = new StringTokenizer(index_str, StaticStrings.COMMA_CHARACTER);
1010 while(content_tokenizer.hasMoreTokens()) {
1011 Element content_element = document.createElement(CONTENT_ELEMENT);
1012 String content_str = content_tokenizer.nextToken();
1013 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1014 if(content_str.indexOf(MSMUtils.NS_SEP) == -1) {
1015 if(content_str.equals(StaticStrings.TEXT_STR) || (!old_index && content_str.equals(StaticStrings.ALLFIELDS_STR))) {
1016 // Our special strings are OK.
1017 }
1018 else {
1019 content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str;
1020 }
1021 }
1022 content_element.setAttribute(NAME_ATTRIBUTE, content_str);
1023 index_element.appendChild(content_element);
1024 content_element = null;
1025 }
1026 content_tokenizer = null;
1027 index_str = null;
1028 command_element.appendChild(index_element);
1029 index_element = null;
1030 }
1031 tokenizer = null;
1032 }
1033 catch (Exception exception) {
1034 exception.printStackTrace();
1035 }
1036 return command_element;
1037 }
1038
1039 private Element parseIndexDefault(String command_str) {
1040 Element command_element = null;
1041 try {
1042 StringTokenizer tokenizer = new StringTokenizer(command_str);
1043 if(tokenizer.countTokens() >= 2) {
1044 command_element = document.createElement(INDEX_DEFAULT_ELEMENT);
1045 command_element.setAttribute(ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken()).equals(INDEX_DEFAULT_STR) ? TRUE_STR : FALSE_STR));
1046 String index_str = tokenizer.nextToken();
1047 command_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
1048 String content_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
1049 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1050 while(content_tokenizer.hasMoreTokens()) {
1051 Element content_element = document.createElement(CONTENT_ELEMENT);
1052 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1053 command_element.appendChild(content_element);
1054 content_element = null;
1055 }
1056 content_tokenizer = null;
1057 content_str = null;
1058 content_str = null;
1059 index_str = null;
1060 }
1061 tokenizer = null;
1062 }
1063 catch (Exception exception) {
1064 }
1065 return command_element;
1066 }
1067
1068 private Element parseLanguage(String command_str) {
1069 Element command_element = null;
1070 try {
1071 StringTokenizer tokenizer = new StringTokenizer(command_str);
1072 tokenizer.nextToken();
1073 if(tokenizer.hasMoreTokens()) {
1074 command_element = document.createElement(LANGUAGES_ELEMENT);
1075 while(tokenizer.hasMoreTokens()) {
1076 Element language_element = document.createElement(LANGUAGE_ELEMENT);
1077 language_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1078 command_element.appendChild(language_element);
1079 language_element = null;
1080 }
1081 }
1082 tokenizer = null;
1083 }
1084 catch (Exception exception) {
1085 }
1086 return command_element;
1087 }
1088
1089 private Element parseLanguageDefault(String command_str) {
1090 Element command_element = null;
1091 try {
1092 StringTokenizer tokenizer = new StringTokenizer(command_str);
1093 if(tokenizer.countTokens() >= 2) {
1094 command_element = document.createElement(LANGUAGE_DEFAULT_ELEMENT);
1095 tokenizer.nextToken();
1096 String default_language_str = tokenizer.nextToken();
1097 command_element.setAttribute(NAME_ATTRIBUTE, default_language_str);
1098 default_language_str = null;
1099 }
1100 tokenizer = null;
1101 }
1102 catch (Exception exception) {
1103 }
1104 return command_element;
1105 }
1106
1107 private Element parseLevels(String command_str) {
1108 Element command_element = null;
1109 try {
1110 StringTokenizer tokenizer = new StringTokenizer(command_str);
1111 // First token is command type (levels)
1112 String command = tokenizer.nextToken();
1113 if(tokenizer.hasMoreTokens()) {
1114 command_element = document.createElement(LEVELS_ELEMENT);
1115 command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(LEVELS_STR) ? TRUE_STR : FALSE_STR));
1116 while(tokenizer.hasMoreTokens()) {
1117 Element level_element = document.createElement(CONTENT_ELEMENT);
1118 level_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1119 command_element.appendChild(level_element);
1120 level_element = null;
1121 }
1122 }
1123 command = null;
1124 }
1125 catch(Exception exception) {
1126 }
1127 return command_element;
1128 }
1129
1130 private Element parseMetadata(String command_str) {
1131 Element command_element = null;
1132 try {
1133 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1134 if(tokenizer.countTokens() >= 3) {
1135 command_element = document.createElement(COLLECTIONMETADATA_ELEMENT);
1136 // First token is command type
1137 tokenizer.nextToken();
1138 String name_str = tokenizer.nextToken();
1139 String value_str = tokenizer.nextToken();
1140 String language_str = "en"; // By default
1141 // Check if the value string is actually a language string
1142 if(value_str.startsWith(LBRACKET_CHARACTER) && value_str.endsWith(RBRACKET_CHARACTER)) {
1143 language_str = value_str.substring(value_str.indexOf(LANGUAGE_ARGUMENT) + 2, value_str.length() - 1);
1144 value_str = tokenizer.nextToken();
1145 }
1146 // Remove any speech marks from value
1147 if(value_str.startsWith(SPEECH_CHARACTER) && value_str.endsWith(SPEECH_CHARACTER)) {
1148 value_str = value_str.substring(1, value_str.length() - 1);
1149 }
1150 if(value_str != null) {
1151 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
1152 value_str = Codec.transform(value_str, Codec.GREENSTONE_TO_DOM);
1153 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1154 command_element.setAttribute(LANGUAGE_ATTRIBUTE, language_str);
1155 MSMUtils.setValue(command_element, value_str);
1156 }
1157 else {
1158 command_element = null;
1159 }
1160 language_str = null;
1161 value_str = null;
1162 name_str = null;
1163 }
1164 tokenizer = null;
1165 }
1166 catch (Exception exception) {
1167 }
1168 return command_element;
1169 }
1170
1171 private Element parseMetadataSpecial(String command_str) {
1172 Element command_element = null;
1173 try {
1174 StringTokenizer tokenizer = new StringTokenizer(command_str);
1175 if(tokenizer.countTokens() >= 2) {
1176 String name_str = tokenizer.nextToken();
1177 String value_str = tokenizer.nextToken();
1178 if(name_str.equals(COLLECTIONMETADATA_BETA_STR)) {
1179 command_element = document.createElement(COLLECTIONMETADATA_BETA_ELEMENT);
1180 }
1181 else if(name_str.equals(COLLECTIONMETADATA_CREATOR_STR)) {
1182 command_element = document.createElement(COLLECTIONMETADATA_CREATOR_ELEMENT);
1183 }
1184 else if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
1185 command_element = document.createElement(COLLECTIONMETADATA_MAINTAINER_ELEMENT);
1186 }
1187 else if(name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
1188 command_element = document.createElement(COLLECTIONMETADATA_PUBLIC_ELEMENT);
1189 }
1190 if(command_element != null) {
1191 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1192 command_element.setAttribute(LANGUAGE_ATTRIBUTE, ENGLISH_LANGUAGE_STR);
1193 command_element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
1194 if(value_str.startsWith(SPEECH_CHARACTER) && value_str.endsWith(SPEECH_CHARACTER)) {
1195 value_str = value_str.substring(1, value_str.length() - 1);
1196 }
1197 MSMUtils.setValue(command_element, value_str);
1198 }
1199 value_str = null;
1200 name_str = null;
1201 }
1202 tokenizer = null;
1203 }
1204 catch (Exception exception) {
1205 }
1206 return command_element;
1207 }
1208
1209 private Element parsePlugIn(String command_str) {
1210 Element command_element = null;
1211 boolean use_metadata_files = false;
1212 try {
1213 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1214 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1215 if(tokenizer.countTokens() >= 2) {
1216 command_element = document.createElement(PLUGIN_ELEMENT);
1217 // First token is plugin
1218 tokenizer.nextToken();
1219 // The next token is the type
1220 String type = tokenizer.nextToken();
1221 command_element.setAttribute(TYPE_ATTRIBUTE, type);
1222 // Now we parse out the remaining arguments into a hashmapping from name to value
1223 HashMap arguments = parseArguments(tokenizer);
1224 // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
1225 Iterator names = arguments.keySet().iterator();
1226 while(names.hasNext()) {
1227 String name = (String) names.next();
1228 String value = (String) arguments.get(name); // Can be null
1229 Element option_element = document.createElement(OPTION_ELEMENT);
1230 if(name.substring(1).equals(USE_METADATA_FILES_ARGUMENT)) {
1231 use_metadata_files = true;
1232 }
1233 option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
1234 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1235 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); // All arguments are considered to be custom until matched against base plugins arguments
1236 if(value != null) {
1237 // Remove any speech marks appended in strings containing whitespace
1238 if(value.startsWith(SPEECH_CHARACTER) && value.endsWith(SPEECH_CHARACTER)) {
1239 value = value.substring(1, value.length() - 1);
1240 }
1241 if(name.equals(METADATA_ARGUMENT)) {
1242 // The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace.
1243 if(value.indexOf(MSMUtils.NS_SEP) == -1) {
1244 value = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + value;
1245 }
1246 }
1247 MSMUtils.setValue(option_element, value);
1248 }
1249 command_element.appendChild(option_element);
1250 option_element = null;
1251 name = null;
1252 value = null;
1253 }
1254
1255 // We must have some RecPlug options: use_metadata_files, and show_progress
1256 if (type.equals(RECPLUG_STR)) {
1257 if (!use_metadata_files) {
1258 Element option_element = document.createElement(OPTION_ELEMENT);
1259 option_element.setAttribute(NAME_ATTRIBUTE, USE_METADATA_FILES_ARGUMENT);
1260 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1261 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
1262 command_element.appendChild(option_element);
1263 option_element = null;
1264 }
1265 Element option_element = document.createElement(OPTION_ELEMENT);
1266 option_element.setAttribute(NAME_ATTRIBUTE, SHOW_PROGRESS_ARGUMENT);
1267 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1268 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
1269 command_element.appendChild(option_element);
1270 option_element = null;
1271 }
1272 type = null;
1273 names = null;
1274 arguments = null;
1275 }
1276 tokenizer = null;
1277 }
1278 catch(Exception exception) {
1279 }
1280 return command_element;
1281 }
1282
1283 private Element parseSearchType(String command_str) {
1284 Element command_element = null;
1285 try {
1286 StringTokenizer tokenizer = new StringTokenizer(command_str);
1287 // First token is command type (levels)
1288 tokenizer.nextToken();
1289 if(tokenizer.hasMoreTokens()) {
1290 command_element = document.createElement(SEARCHTYPE_ELEMENT);
1291 command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1292 while(tokenizer.hasMoreTokens()) {
1293 Element search_element = document.createElement(CONTENT_ELEMENT);
1294 search_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1295 command_element.appendChild(search_element);
1296 search_element = null;
1297 }
1298 }
1299 }
1300 catch(Exception exception) {
1301 }
1302 return command_element;
1303 }
1304
1305 private Element parseSubCollection(String command_str) {
1306 Element command_element = null;
1307 try {
1308 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1309 if(tokenizer.countTokens() >= 3) {
1310 command_element = document.createElement(SUBCOLLECTION_ELEMENT);
1311 // First token is command type
1312 tokenizer.nextToken();
1313 // Then subcollection identifier
1314 command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1315 // Then finally the pattern used to build the subcollection partition
1316 String full_pattern_str = tokenizer.nextToken();
1317 // To make life easier I'm going to parse this up now.
1318 boolean exclusion = (full_pattern_str.substring(1, 2).equals(EXCLAIMATION_CHARACTER));
1319 // Set inclusion/exclusion flag, remove any exclaimation mark and the speech marks
1320 if(exclusion) {
1321 full_pattern_str = full_pattern_str.substring(2, full_pattern_str.length() - 1);
1322 command_element.setAttribute(TYPE_ATTRIBUTE, EXCLUDE_STR);
1323 }
1324 else {
1325 full_pattern_str = full_pattern_str.substring(1, full_pattern_str.length() - 1);
1326 command_element.setAttribute(TYPE_ATTRIBUTE, INCLUDE_STR);
1327 }
1328 StringTokenizer pattern_tokenizer = new StringTokenizer(full_pattern_str, SEPARATOR_CHARACTER);
1329 if(pattern_tokenizer.countTokens() >= 2) {
1330 String content_str = pattern_tokenizer.nextToken();
1331 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1332 if(!content_str.equals(StaticStrings.FILENAME_STR) && content_str.indexOf(MSMUtils.NS_SEP) == -1) {
1333 content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str;
1334 }
1335 command_element.setAttribute(CONTENT_ATTRIBUTE, content_str);
1336 MSMUtils.setValue(command_element, pattern_tokenizer.nextToken());
1337 if(pattern_tokenizer.hasMoreTokens()) {
1338 command_element.setAttribute(OPTIONS_ATTRIBUTE, pattern_tokenizer.nextToken());
1339 }
1340 }
1341 pattern_tokenizer = null;
1342 }
1343 }
1344 catch(Exception exception) {
1345 exception.printStackTrace();
1346 }
1347 return command_element;
1348 }
1349
1350 private Element parseSubCollectionDefaultIndex(String command_str) {
1351 Element command_element = null;
1352 try {
1353 StringTokenizer tokenizer = new StringTokenizer(command_str);
1354 if(tokenizer.countTokens() == 2) {
1355 command_element = document.createElement(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT);
1356 tokenizer.nextToken();
1357 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1358 String content_str = tokenizer.nextToken();
1359 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1360 while(content_tokenizer.hasMoreTokens()) {
1361 Element content_element = document.createElement(CONTENT_ELEMENT);
1362 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1363 command_element.appendChild(content_element);
1364 content_element = null;
1365 }
1366 content_tokenizer = null;
1367 content_str = null;
1368 }
1369 tokenizer = null;
1370 }
1371 catch(Exception exception) {
1372 }
1373 return command_element;
1374 }
1375
1376 private Element parseSubCollectionIndex(String command_str) {
1377 Element command_element = null;
1378 try {
1379 StringTokenizer tokenizer = new StringTokenizer(command_str);
1380 tokenizer.nextToken();
1381 if(tokenizer.hasMoreTokens()) {
1382 command_element = document.createElement(SUBCOLLECTION_INDEXES_ELEMENT);
1383 }
1384 while(tokenizer.hasMoreTokens()) {
1385 Element subcollectionindex_element = document.createElement(INDEX_ELEMENT);
1386 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1387 String content_str = tokenizer.nextToken();
1388 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1389 while(content_tokenizer.hasMoreTokens()) {
1390 Element content_element = document.createElement(CONTENT_ELEMENT);
1391 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1392 subcollectionindex_element.appendChild(content_element);
1393 content_element = null;
1394 }
1395 content_tokenizer = null;
1396 content_str = null;
1397 command_element.appendChild(subcollectionindex_element);
1398 subcollectionindex_element = null;
1399 }
1400 tokenizer = null;
1401 }
1402 catch (Exception exception) {
1403 }
1404 return command_element;
1405 }
1406
1407 private Element parseSuperCollection(String command_str) {
1408 Element command_element = null;
1409 try {
1410 StringTokenizer tokenizer = new StringTokenizer(command_str);
1411 if(tokenizer.countTokens() >= 3) {
1412 command_element = document.createElement(SUPERCOLLECTION_ELEMENT);
1413 tokenizer.nextToken();
1414 while(tokenizer.hasMoreTokens()) {
1415 Element collection_element = document.createElement(COLLECTION_ELEMENT);
1416 collection_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1417 command_element.appendChild(collection_element);
1418 collection_element = null;
1419 }
1420 }
1421 tokenizer = null;
1422 }
1423 catch(Exception exception) {
1424 }
1425 return command_element;
1426 }
1427
1428 private String pluginToString(Element command_element, boolean show_extracted_namespace) {
1429 StringBuffer text = new StringBuffer();
1430 if(!command_element.getAttribute(SEPARATOR_ATTRIBUTE).equals(TRUE_STR)) {
1431 text.append(PLUGIN_STR);
1432 text.append(TAB_CHARACTER);
1433 text.append(TAB_CHARACTER);
1434 text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
1435 // Retrieve, and output, the arguments
1436 NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
1437 int option_elements_length = option_elements.getLength();
1438 if(option_elements_length > 0) {
1439 text.append(SPACE_CHARACTER);
1440 for(int j = 0; j < option_elements_length; j++) {
1441 Element option_element = (Element) option_elements.item(j);
1442 if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1443 text.append(StaticStrings.MINUS_CHARACTER);
1444 text.append(option_element.getAttribute(NAME_ATTRIBUTE));
1445 String value_str = MSMUtils.getValue(option_element);
1446 if(!show_extracted_namespace && value_str.startsWith(EXTRACTED_PREFIX)) {
1447 value_str = value_str.substring(EXTRACTED_PREFIX.length());
1448 }
1449 if(value_str.length() > 0) {
1450 text.append(SPACE_CHARACTER);
1451 if(value_str.indexOf(SPACE_CHARACTER) == -1) {
1452 text.append(value_str);
1453 }
1454 else {
1455 text.append(SPEECH_CHARACTER);
1456 text.append(value_str);
1457 text.append(SPEECH_CHARACTER);
1458 }
1459 }
1460 value_str = null;
1461 if(j < option_elements_length - 1) {
1462 text.append(SPACE_CHARACTER);
1463 }
1464 }
1465 option_element = null;
1466 }
1467 }
1468 option_elements = null;
1469 }
1470 return text.toString();
1471 }
1472
1473 private String searchtypeToString(Element command_element) {
1474 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1475 StringBuffer text = new StringBuffer(SEARCHTYPE_STR);
1476 text.append(TAB_CHARACTER);
1477 NodeList search_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1478 int search_elements_length = search_elements.getLength();
1479 for(int i = 0; i < search_elements_length; i++) {
1480 Element search_element = (Element) search_elements.item(i);
1481 text.append(search_element.getAttribute(NAME_ATTRIBUTE));
1482 text.append(SPACE_CHARACTER);
1483 }
1484 return text.substring(0, text.length() - 1);
1485 }
1486 else {
1487 return null;
1488 }
1489 }
1490
1491 private String subcollectionToString(Element command_element, boolean show_extracted_namespace) {
1492 StringBuffer text = new StringBuffer(SUBCOLLECTION_STR);
1493 text.append(SPACE_CHARACTER);
1494 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
1495 text.append(SPACE_CHARACTER);
1496 text.append(TAB_CHARACTER);
1497 text.append(SPEECH_CHARACTER);
1498 if(command_element.getAttribute(TYPE_ATTRIBUTE).equals(EXCLUDE_STR)) {
1499 text.append(EXCLAIMATION_CHARACTER);
1500 }
1501 String content_str = command_element.getAttribute(CONTENT_ATTRIBUTE);
1502 if(!show_extracted_namespace && content_str.startsWith(EXTRACTED_PREFIX)) {
1503 content_str = content_str.substring(EXTRACTED_PREFIX.length());
1504 }
1505 text.append(content_str);
1506 content_str = null;
1507 text.append(SEPARATOR_CHARACTER);
1508 text.append(MSMUtils.getValue(command_element));
1509 String options_str = command_element.getAttribute(OPTIONS_ATTRIBUTE);
1510 if(options_str.length() > 0) {
1511 text.append(SEPARATOR_CHARACTER);
1512 text.append(options_str);
1513 }
1514 options_str = null;
1515 text.append(SPEECH_CHARACTER);
1516 return text.toString();
1517 }
1518
1519 private String subcollectionDefaultIndexToString(Element command_element) {
1520 StringBuffer text = new StringBuffer(SUBCOLLECTION_DEFAULT_INDEX_STR);
1521 text.append(TAB_CHARACTER);
1522 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1523 int content_elements_length = content_elements.getLength();
1524 for(int j = 0; j < content_elements_length; j++) {
1525 Element content_element = (Element) content_elements.item(j);
1526 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1527 if(j < content_elements_length - 1) {
1528 text.append(StaticStrings.COMMA_CHARACTER);
1529 }
1530 }
1531 return text.toString();
1532 }
1533
1534 private String subcollectionIndexesToString(Element command_element) {
1535 StringBuffer text = new StringBuffer(SUBCOLLECTION_INDEX_STR);
1536 text.append(TAB_CHARACTER);
1537 // Retrieve all of the subcollection index partitions
1538 NodeList subcollectionindex_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
1539 int subcollectionindex_elements_length = subcollectionindex_elements.getLength();
1540 if(subcollectionindex_elements_length == 0) {
1541 return null;
1542 }
1543 for(int j = 0; j < subcollectionindex_elements_length; j++) {
1544 Element subcollectionindex_element = (Element) subcollectionindex_elements.item(j);
1545 NodeList content_elements = subcollectionindex_element.getElementsByTagName(CONTENT_ELEMENT);
1546 int content_elements_length = content_elements.getLength();
1547 for(int k = 0; k < content_elements_length; k++) {
1548 Element content_element = (Element) content_elements.item(k);
1549 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1550 if(k < content_elements_length - 1) {
1551 text.append(StaticStrings.COMMA_CHARACTER);
1552 }
1553 }
1554 if(j < subcollectionindex_elements_length - 1) {
1555 text.append(SPACE_CHARACTER);
1556 }
1557 }
1558 return text.toString();
1559 }
1560
1561 private String supercollectionToString(Element command_element) {
1562 NodeList content_elements = command_element.getElementsByTagName(COLLECTION_ELEMENT);
1563 int content_elements_length = content_elements.getLength();
1564 if(content_elements_length > 1) {
1565 StringBuffer text = new StringBuffer(SUPERCOLLECTION_STR);
1566 text.append(TAB_CHARACTER);
1567 for(int j = 0; j < content_elements_length; j++) {
1568 Element content_element = (Element) content_elements.item(j);
1569 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1570 if(j < content_elements_length - 1) {
1571 text.append(SPACE_CHARACTER);
1572 }
1573 }
1574 return text.toString();
1575 }
1576 return null;
1577 }
1578
1579 private String unknownToString(Element command_element) {
1580 return MSMUtils.getValue(command_element);
1581 }
1582
1583 /** Write the text to the buffer. This is used so we don't have to worry about storing intermediate String values just so we can calaulate length and offset.
1584 * @param writer the BufferedWriter to which the str will be written
1585 * @param str the String to be written
1586 */
1587 private void write(BufferedWriter writer, String str)
1588 throws IOException {
1589 writer.write(str, 0, str.length());
1590 }
1591}
Note: See TracBrowser for help on using the repository browser.