source: trunk/gli/src/org/greenstone/gatherer/cdm/CollectionConfiguration.java@ 5208

Last change on this file since 5208 was 5208, checked in by jmt12, 21 years ago

Fix 203B150

  • Property svn:keywords set to Author Date Id Revision
File size: 65.2 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.cdm;
28import java.awt.*;
29import java.awt.event.*;
30import java.io.*;
31import java.util.*;
32import javax.swing.*;
33import org.greenstone.gatherer.Gatherer;
34import org.greenstone.gatherer.cdm.CommandTokenizer;
35import org.greenstone.gatherer.msm.MSMUtils;
36import org.greenstone.gatherer.util.DOMTree;
37import org.greenstone.gatherer.util.StaticStrings;
38import org.greenstone.gatherer.util.Utility;
39import org.w3c.dom.*;
40/** This class provides either access to a pseudo-G3 document, or parses a collect.cfg file in such a way as to provide an xml-type view of its content. This later version is useful as it allows the manipulation and free form editing of a legacy collect.cfg file while still allowing the various CDM data managers to base themselves directly on this model (whereas they used to be independant ListModels which clobbered the ordering of unparsed commands).
41 * @author John Thompson, Greenstone Digital Library, University of Waikato
42 * @version 2.3d
43 */
44public class CollectionConfiguration
45 extends StaticStrings {
46
47 static public Document document;
48
49 static public void main(String[] args) {
50 if(args.length >= 1) {
51 File file = new File(args[0]);
52 CollectionConfiguration collect_cfg = new CollectionConfiguration(file);
53 collect_cfg.save(true);
54 collect_cfg.save(false);
55 collect_cfg = null;
56 }
57 else {
58 System.out.println("Usage: CollectionConfiguration <filename>");
59 }
60 }
61
62 /** Find the best insertion position for the given DOM Element. This should try to match command tag, and if found should then try to group by name or type (eg CollectionMeta), or append to end is no such grouping exists (eg PlugIns). Failing a command match it will check against the command order for the best insertion location.
63 * @param element the command Element to be inserted
64 * @return the Element which the given command should be inserted before, or null to append to end of list
65 */
66 static public Node findInsertionPoint(Element target_element) {
67 ///ystem.err.println("Find insertion point: " + target_element.getNodeName());
68 String target_element_name = target_element.getNodeName();
69 Element document_element = document.getDocumentElement();
70 // Try to find commands with the same tag.
71 NodeList matching_elements = document_element.getElementsByTagName(target_element_name);
72 // If we found matching elements, then we have our most likely insertion location, so check within for groupings
73 if(matching_elements.getLength() != 0) {
74 ///ystem.err.println("Found matching elements.");
75 // Only CollectionMeta are grouped.
76 if(target_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
77 ///ystem.err.println("Dealing with collection metadata");
78 // Special case: CollectionMeta can be added at either the start or end of a collection configuration file. However the start position is reserved for special metadata, so if no non-special metadata can be found we must append to the end.
79 // So if the command to be added is special add it immediately after any other special command
80 if(target_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
81 int index = 0;
82 Element matched_element = (Element) matching_elements.item(index);
83 Element sibling_element = (Element) matched_element.getNextSibling();
84 while(sibling_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
85 index++;
86 matched_element = (Element) matching_elements.item(index);
87 sibling_element = (Element) matched_element.getNextSibling();
88 }
89 if(sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
90 Element newline_element = document.createElement(NEWLINE_ELEMENT);
91 document_element.insertBefore(newline_element, sibling_element);
92 }
93 return sibling_element;
94 }
95 // Otherwise try to find a matching 'name' and add after the last one in that group.
96 else {
97 int index = 0;
98 target_element_name = target_element.getAttribute(NAME_ATTRIBUTE);
99 boolean found = false;
100 // Skip all of the special metadata
101 Element matched_element = (Element) matching_elements.item(index);
102 while(matched_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
103 index++;
104 matched_element = (Element) matching_elements.item(index);
105 }
106 // Begin search
107 while(!found && matched_element != null) {
108 if(matched_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
109 found = true;
110 }
111 else {
112 index++;
113 matched_element = (Element) matching_elements.item(index);
114 }
115 }
116 // If we found a match, we need to continue checking until we find the last name match.
117 if(found) {
118 index++;
119 Element previous_sibling = matched_element;
120 Element sibling_element = (Element) matching_elements.item(index);
121 while(sibling_element != null && sibling_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
122 previous_sibling = sibling_element;
123 index++;
124 sibling_element = (Element) matching_elements.item(index);
125 }
126 // Previous sibling now holds the command immediately before where we want to add, so find its next sibling and add to that. In this one case we can ignore new lines!
127 return previous_sibling.getNextSibling();
128 }
129 // If not found we just add after last metadata element
130 else {
131 Element last_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
132 return last_element.getNextSibling();
133 }
134 }
135
136 }
137 else {
138 ///ystem.err.println("Not dealing with collection meta.");
139 Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
140 // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
141 Node sibling_element = matched_element.getNextSibling();
142 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
143 Element newline_element = document.createElement(NEWLINE_ELEMENT);
144 document_element.insertBefore(newline_element, sibling_element);
145 }
146 return sibling_element; // Note that this may be null
147 }
148 }
149 ///ystem.err.println("No matching elements found.");
150 // Locate where this command is in the ordering
151 int command_index = -1;
152 for(int i = 0; command_index == -1 && i < COMMAND_ORDER.length; i++) {
153 if(COMMAND_ORDER[i].equals(target_element_name)) {
154 command_index = i;
155 }
156 }
157 ///ystem.err.println("Command index is: " + command_index);
158 // Now move forward, checking for existing elements in each of the preceeding command orders.
159 int preceeding_index = command_index - 1;
160 ///ystem.err.println("Searching before the target command.");
161 while(preceeding_index >= 0) {
162 matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[preceeding_index]);
163 // If we've found a match
164 if(matching_elements.getLength() > 0) {
165 // We add after the last element
166 Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
167 // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
168 Node sibling_element = matched_element.getNextSibling();
169 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
170 Element newline_element = document.createElement(NEWLINE_ELEMENT);
171 document_element.insertBefore(newline_element, sibling_element);
172 }
173 return sibling_element; // Note that this may be null
174 }
175 preceeding_index--;
176 }
177 // If all that fails, we now move backwards through the commands
178 int susceeding_index = command_index + 1;
179 ///ystem.err.println("Searching after the target command.");
180 while(susceeding_index < COMMAND_ORDER.length) {
181 matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[susceeding_index]);
182 // If we've found a match
183 if(matching_elements.getLength() > 0) {
184 // We add before the first element
185 Element matched_element = (Element) matching_elements.item(0);
186 // One final quick test. If the matched element is immediately preceeded by a NewLine command, then we insert another NewLine before the matched command, then return this new NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
187 Node sibling_element = matched_element.getPreviousSibling();
188 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
189 Element newline_element = document.createElement(NEWLINE_ELEMENT);
190 document_element.insertBefore(newline_element, sibling_element);
191 }
192 return sibling_element; // Note that this may be null
193 }
194 susceeding_index++;
195 }
196 // Well. Apparently there are no other commands in this collection configuration. So append away...
197 return null;
198 }
199
200 static public String toString(Element command_element, boolean show_extracted_namespace) {
201 String command_element_name = command_element.getNodeName();
202 if(command_element_name.equals(CLASSIFY_ELEMENT)) {
203 return self.classifyToString(command_element, show_extracted_namespace);
204 }
205 else if(command_element_name.equals(FORMAT_ELEMENT)) {
206 return self.formatToString(command_element, show_extracted_namespace);
207 }
208 else if(command_element_name.equals(INDEXES_ELEMENT)) {
209 return self.indexesToString(command_element, show_extracted_namespace);
210 }
211 else if(command_element_name.equals(INDEX_DEFAULT_ELEMENT)) {
212 return self.indexDefaultToString(command_element, show_extracted_namespace);
213 }
214 else if(command_element_name.equals(LANGUAGES_ELEMENT)) {
215 return self.languagesToString(command_element);
216 }
217 else if(command_element_name.equals(LANGUAGE_DEFAULT_ELEMENT)) {
218 return self.languageDefaultToString(command_element);
219 }
220 else if(command_element_name.equals(LEVELS_ELEMENT)) {
221 return self.levelsToString(command_element);
222 }
223 else if(command_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
224 return self.metadataToString(command_element);
225 }
226 else if(command_element_name.equals(COLLECTIONMETADATA_CREATOR_ELEMENT)) {
227 return self.metadataToString(command_element);
228 }
229 else if(command_element_name.equals(COLLECTIONMETADATA_MAINTAINER_ELEMENT)) {
230 return self.metadataToString(command_element);
231 }
232 else if(command_element_name.equals(COLLECTIONMETADATA_PUBLIC_ELEMENT)) {
233 return self.metadataToString(command_element);
234 }
235 else if(command_element_name.equals(COLLECTIONMETADATA_BETA_ELEMENT)) {
236 return self.metadataToString(command_element);
237 }
238 else if(command_element_name.equals(PLUGIN_ELEMENT)) {
239 return self.pluginToString(command_element, show_extracted_namespace);
240 }
241 else if(command_element_name.equals(SEARCHTYPE_ELEMENT)) {
242 return self.searchtypeToString(command_element);
243 }
244 else if(command_element_name.equals(SUBCOLLECTION_ELEMENT)) {
245 return self.subcollectionToString(command_element, show_extracted_namespace);
246 }
247 else if(command_element_name.equals(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) {
248 return self.subcollectionDefaultIndexToString(command_element);
249 }
250 else if(command_element_name.equals(SUBCOLLECTION_INDEXES_ELEMENT)) {
251 return self.subcollectionIndexesToString(command_element);
252 }
253 else if(command_element_name.equals(SUPERCOLLECTION_ELEMENT)) {
254 return self.supercollectionToString(command_element);
255 }
256 else if(command_element_name.equals(UNKNOWN_ELEMENT)) {
257 return self.unknownToString(command_element);
258 }
259 return "";
260 }
261
262 /** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix.
263 * @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string
264 * @return a HashMap containing the arguments parsed
265 */
266 static public HashMap parseArguments(CommandTokenizer tokenizer) {
267 HashMap arguments = new HashMap();
268 String name = null;
269 String value = null;
270 while(tokenizer.hasMoreTokens() || name != null) {
271 // First we retrieve a name if we need one.
272 if(name == null) {
273 name = tokenizer.nextToken();
274 }
275 // Now we attempt to retrieve a value
276 if(tokenizer.hasMoreTokens()) {
277 value = tokenizer.nextToken();
278 // Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop.
279 if(value.startsWith(StaticStrings.MINUS_CHARACTER)) {
280 arguments.put(name, null);
281 name = value;
282 }
283 // Otherwise we have a typical name->value pair ready to go
284 else {
285 arguments.put(name, value);
286 name = null;
287 }
288 }
289 // Otherwise its a binary flag
290 else {
291 arguments.put(name, null);
292 name = null;
293 }
294 }
295 return arguments;
296 }
297
298 static private ArrayList known_metadata;
299
300 static private CollectionConfiguration self;
301
302 static final private String EXTRACTED_PREFIX = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP;
303 /** Gives the preferred ordering of commands */
304 static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, StaticStrings.COLLECTIONMETADATA_BETA_ELEMENT, StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT};
305
306 /** ************************** Public Data Members ***************************/
307
308 /** ************************** Private Data Members ***************************/
309
310 private File collect_config_file;
311
312 /** ************************** Public Methods ***************************/
313
314 public CollectionConfiguration(File collect_config_file) {
315 this.self = this;
316 this.collect_config_file = collect_config_file;
317 // If collect_cfg is xml we can load it straight away
318 String collect_config_name = collect_config_file.getName();
319 if(collect_config_name.equals(COLLECTCONFIGURATION_XML)) {
320 // Parse with Utility but don't use class loader
321 document = Utility.parse(collect_config_file, false);
322 }
323 // Otherwise if this is a legacy collect.cfg file then read in the template and send to magic parser
324 else if(collect_config_name.equals(COLLECT_CFG)) {
325 document = Utility.parse(PSEUDO_COLLECTCONFIGURATION_XML, true);
326 parse(collect_config_file);
327 }
328 }
329
330 /** This debug facility shows the currently loaded collect.cfg or CollectConfig.xml file as a DOM tree. */
331 public void display() {
332 JDialog dialog = new JDialog(Gatherer.g_man, "Collection Configuration", false);
333 dialog.setSize(400,400);
334 JPanel content_pane = (JPanel) dialog.getContentPane();
335 final DOMTree tree = new DOMTree(document);
336 JButton refresh_button = new JButton("Refresh Tree");
337 refresh_button.addActionListener(new ActionListener() {
338 public void actionPerformed(ActionEvent event) {
339 tree.setDocument(document);
340 }
341 });
342 content_pane.setBorder(BorderFactory.createEmptyBorder(5,5,5,5));
343 content_pane.setLayout(new BorderLayout());
344 content_pane.add(new JScrollPane(tree), BorderLayout.CENTER);
345 content_pane.add(refresh_button, BorderLayout.SOUTH);
346 dialog.show();
347 }
348
349 public Element getBeta() {
350 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_BETA_ELEMENT, null, null);
351 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_BETA_STR);
352 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
353 return element;
354 }
355
356 public Element getCreator() {
357 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_CREATOR_ELEMENT, null, null);
358 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_CREATOR_STR);
359 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
360 return element;
361 }
362
363 public Element getDocumentElement() {
364 return document.getDocumentElement();
365 }
366
367 public File getFile() {
368 return collect_config_file;
369 }
370
371 /** Retrieve or create the languages Element. */
372 public Element getLanguages() {
373 return getOrCreateElementByTagName(LANGUAGES_ELEMENT, null, null);
374 }
375
376 public Element getLevels() {
377 return getOrCreateElementByTagName(LEVELS_ELEMENT, null, null);
378 }
379
380 public Element getMaintainer() {
381 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_MAINTAINER_ELEMENT, null, null);
382 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_MAINTAINER_STR);
383 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
384 return element;
385 }
386
387 /** Retrieve or create the indexes Element. Note that this method behaves differently from the other getBlah methods, in that it also has to keep in mind that indexes come in two flavours, MG and MGPP. */
388 public Element getMGIndexes() {
389 return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, FALSE_STR);
390 }
391
392 public Element getMGPPIndexes() {
393 return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, TRUE_STR);
394 }
395
396 public Element getPublic() {
397 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_PUBLIC_ELEMENT, null, null);
398 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_PUBLIC_STR);
399 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
400 return element;
401 }
402
403 /** Retrieve or create the searchtype element. */
404 public Element getSearchType() {
405 ///ystem.err.println("Get or create element by tag name: " + name);
406 Element document_element = document.getDocumentElement();
407 NodeList elements = document_element.getElementsByTagName(SEARCHTYPE_ELEMENT);
408 int elements_length = elements.getLength();
409 if(elements_length > 0) {
410 document_element = null;
411 return (Element) elements.item(0);
412 }
413 // Create the element
414 Element element = document.createElement(SEARCHTYPE_ELEMENT);
415 Node target_node = findInsertionPoint(element);
416 if(target_node != null) {
417 document_element.insertBefore(element, target_node);
418 }
419 else {
420 document_element.appendChild(element);
421 }
422 document_element = null;
423 // Append a default search type node - form
424 Element a_searchtype_element = CollectionDesignManager.collect_config.document.createElement(CollectionConfiguration.CONTENT_ELEMENT);
425 a_searchtype_element.setAttribute(CollectionConfiguration.NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]);
426 element.appendChild(a_searchtype_element);
427 return element;
428 }
429
430 /** Retrieve or create the subindexes Element. */
431 public Element getSubIndexes() {
432 return getOrCreateElementByTagName(SUBCOLLECTION_INDEXES_ELEMENT, null, null);
433 }
434
435 /** Retrieve or create the supercollections Element. */
436 public Element getSuperCollection() {
437 return getOrCreateElementByTagName(SUPERCOLLECTION_ELEMENT, null, null);
438 }
439
440 public void save() {
441 save(false);
442 }
443
444 public void save(boolean force_xml) {
445 if(collect_config_file.exists()) {
446 File original_file = new File(collect_config_file.getParentFile(), COLLECT_CFG);
447 File backup_file = new File(collect_config_file.getParentFile(), "collect.bak");
448 if(backup_file.exists()) {
449 backup_file.delete();
450 }
451 if(!original_file.renameTo(backup_file)) {
452 Gatherer.println("Can't rename collect.cfg");
453 }
454 }
455 if(force_xml || collect_config_file.getName().equals(COLLECTCONFIGURATION_XML)) {
456 ///ystem.err.println("Writing XML");
457 Utility.export(document, new File(collect_config_file.getParentFile(), COLLECTCONFIGURATION_XML));
458 }
459 else {
460 ///ystem.err.println("Writing text");
461 try {
462 FileWriter file_writer = new FileWriter(collect_config_file, false);
463 BufferedWriter buffered_writer = new BufferedWriter(file_writer);
464 // In order to write out an old style collect.cfg we have to traverse the model and do several 'cute' tricks to ensure the collect.cfg is valid (for instance while every metadata element has a language attribute, only second or subsequent metadata, for a certain name, needs a language argument - hence the known metadata array. Note that within GLI the language will always be shown, but it doesn't crash and burn like G2 does, te-he).
465 known_metadata = new ArrayList();
466 Element collect_config_element = document.getDocumentElement();
467 NodeList command_elements = collect_config_element.getChildNodes();
468 boolean just_wrote_newline = false; // Prevent two or more newlines in a row
469 for(int i = 0; i < command_elements.getLength(); i++) {
470 Node command_node = command_elements.item(i);
471 if(command_node instanceof Element) {
472 Element command_element = (Element) command_node;
473 // The only thing left are NewLine elements
474 if(command_element.getNodeName().equals(NEWLINE_ELEMENT) && !just_wrote_newline) {
475 buffered_writer.newLine();
476 just_wrote_newline = true;
477 }
478 // Anything else we write to file, but only if it has been assigned, the exception being the Indexes element which just get commented if unassigned (a side effect of MG && MGPP compatibility)
479 else if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR) || command_element.getNodeName().equals(INDEXES_ELEMENT) || command_element.getNodeName().equals(INDEX_DEFAULT_ELEMENT) || command_element.getNodeName().equals(LEVELS_ELEMENT)){
480 String command = toString(command_element, false);
481 if(command != null && command.length() > 0) {
482 write(buffered_writer, command);
483 buffered_writer.newLine();
484 just_wrote_newline = false;
485 }
486 }
487 }
488 }
489 buffered_writer.close();
490 known_metadata = null;
491 }
492 catch (Exception exception) {
493 Gatherer.println("Error in CollectionConfiguration.save(boolean): " + exception);
494 Gatherer.printStackTrace(exception);
495 }
496 }
497 }
498
499 /** ************************** Private Methods ***************************/
500
501 private String classifyToString(Element command_element, boolean show_extracted_namespace) {
502 StringBuffer text = new StringBuffer(CLASSIFY_STR);
503 text.append(TAB_CHARACTER);
504 text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
505 text.append(SPACE_CHARACTER);
506 NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
507 int option_elements_length = option_elements.getLength();
508 for(int j = 0; j < option_elements_length; j++) {
509 Element option_element = (Element) option_elements.item(j);
510 if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
511 text.append(StaticStrings.MINUS_CHARACTER);
512 text.append(option_element.getAttribute(NAME_ATTRIBUTE));
513 String value_str = MSMUtils.getValue(option_element);
514 if(value_str.length() > 0) {
515 // If the value happens to be the identifier of an extracted metadata element, then remove the namespace.
516 if(!show_extracted_namespace && value_str.startsWith(EXTRACTED_PREFIX)) {
517 value_str = value_str.substring(EXTRACTED_PREFIX.length());
518 }
519 text.append(SPACE_CHARACTER);
520 if(value_str.indexOf(SPACE_CHARACTER) == -1) {
521 text.append(value_str);
522 }
523 else {
524 text.append(QUOTE_CHARACTER);
525 text.append(value_str);
526 text.append(QUOTE_CHARACTER);
527 }
528 }
529 value_str = null;
530 if(j < option_elements_length - 1) {
531 text.append(SPACE_CHARACTER);
532 }
533 }
534 option_element = null;
535 }
536 option_elements = null;
537 return text.toString();
538 }
539
540 private String formatToString(Element command_element, boolean show_extracted_namespace) {
541 StringBuffer text = new StringBuffer(FORMAT_STR);
542 text.append(SPACE_CHARACTER);
543 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
544 text.append(SPACE_CHARACTER);
545 String value_str = command_element.getAttribute(VALUE_ATTRIBUTE);
546 if(value_str.length() != 0) {
547 text.append(value_str);
548 }
549 else {
550 value_str = MSMUtils.getValue(command_element);
551 // Remove any references to a namespace for extracted metadata
552 if(!show_extracted_namespace) {
553 value_str.replaceAll(EXTRACTED_PREFIX, "");
554 }
555 text.append(QUOTE_CHARACTER);
556 text.append(value_str);
557 text.append(QUOTE_CHARACTER);
558 }
559 value_str = null;
560 return text.toString();
561 }
562
563 /** Retrieve or create the indexes Element. */
564 private Element getOrCreateElementByTagName(String name, String conditional_attribute, String required_value) {
565 ///ystem.err.println("Get or create element by tag name: " + name);
566 Element document_element = document.getDocumentElement();
567 NodeList elements = document_element.getElementsByTagName(name);
568 int elements_length = elements.getLength();
569 if(elements_length > 0) {
570 if(conditional_attribute == null) {
571 document_element = null;
572 return (Element) elements.item(0);
573 }
574 else {
575 for(int i = 0; i < elements_length; i++) {
576 Element element = (Element) elements.item(i);
577 if(element.getAttribute(conditional_attribute).equals(required_value)) {
578 document_element = null;
579 return element;
580 }
581 element = null;
582 }
583 }
584 }
585 // Create the element
586 Element element = document.createElement(name);
587 // If there was a property set it
588 if(conditional_attribute != null) {
589 element.setAttribute(conditional_attribute, required_value);
590 }
591 Node target_node = findInsertionPoint(element);
592 if(target_node != null) {
593 document_element.insertBefore(element, target_node);
594 }
595 else {
596 document_element.appendChild(element);
597 }
598 document_element = null;
599 return element;
600 }
601
602 private String indexesToString(Element command_element, boolean show_extracted_namespace) {
603 boolean comment_only = false;
604 StringBuffer text = new StringBuffer("");
605 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
606 text.append("#");
607 comment_only = true;
608 }
609 text.append(INDEX_STR);
610 text.append(TAB_CHARACTER);
611 if(!comment_only) {
612 text.append(TAB_CHARACTER);
613 }
614 NodeList index_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
615 // For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list
616 int index_elements_length = index_elements.getLength();
617 for(int j = 0; j < index_elements_length; j++) {
618 Element index_element = (Element) index_elements.item(j);
619 String level_str = index_element.getAttribute(LEVEL_ATTRIBUTE);
620 if(level_str.length() > 0) {
621 text.append(level_str);
622 text.append(StaticStrings.COLON_CHARACTER);
623 }
624 NodeList content_elements = index_element.getElementsByTagName(CONTENT_ELEMENT);
625 int content_elements_length = content_elements.getLength();
626 // Don't output anything if no indexes are set
627 if(content_elements_length == 0) {
628 return null;
629 }
630 for(int k = 0; k < content_elements_length; k++) {
631 Element content_element = (Element) content_elements.item(k);
632 String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
633 if(!show_extracted_namespace && name_str.startsWith(EXTRACTED_PREFIX)) {
634 name_str = name_str.substring(EXTRACTED_PREFIX.length());
635 }
636 text.append(name_str);
637 name_str = null;
638 if(k < content_elements_length - 1) {
639 text.append(StaticStrings.COMMA_CHARACTER);
640 }
641 content_element = null;
642 }
643 if(j < index_elements_length - 1) {
644 text.append(SPACE_CHARACTER);
645 }
646 content_elements = null;
647 index_element = null;
648 }
649 index_elements = null;
650 return text.toString();
651 }
652
653 private String indexDefaultToString(Element command_element, boolean show_extracted_namespace) {
654 StringBuffer text = new StringBuffer("");
655 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
656 text.append("#");
657 }
658 text.append(INDEX_DEFAULT_STR);
659 text.append(TAB_CHARACTER);
660 text.append(command_element.getAttribute(LEVEL_ATTRIBUTE));
661 text.append(StaticStrings.COLON_CHARACTER);
662 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
663 int content_elements_length = content_elements.getLength();
664 for(int j = 0; j < content_elements_length; j++) {
665 Element content_element = (Element) content_elements.item(j);
666 String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
667 if(!show_extracted_namespace && name_str.startsWith(EXTRACTED_PREFIX)) {
668 name_str = name_str.substring(EXTRACTED_PREFIX.length());
669 }
670 text.append(name_str);
671 name_str = null;
672 if(j < content_elements_length - 1) {
673 text.append(StaticStrings.COMMA_CHARACTER);
674 }
675 content_element = null;
676 }
677 content_elements = null;
678 return text.toString();
679 }
680
681 private String languagesToString(Element command_element) {
682 StringBuffer text = new StringBuffer(LANGUAGES_STR);
683 text.append(TAB_CHARACTER);
684 // Retrieve all the languages and write them out in a space separated list
685 NodeList language_elements = command_element.getElementsByTagName(LANGUAGE_ELEMENT);
686 int language_elements_length = language_elements.getLength();
687 if(language_elements_length == 0) {
688 return null;
689 }
690 for(int j = 0; j < language_elements_length; j++) {
691 Element language_element = (Element) language_elements.item(j);
692 text.append(language_element.getAttribute(NAME_ATTRIBUTE));
693 if(j < language_elements_length - 1) {
694 text.append(SPACE_CHARACTER);
695 }
696 }
697 return text.toString();
698 }
699
700 private String languageDefaultToString(Element command_element) {
701 StringBuffer text = new StringBuffer(LANGUAGE_DEFAULT_STR);
702 text.append(TAB_CHARACTER);
703 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
704 return text.toString();
705 }
706
707 private String levelsToString(Element command_element) {
708 StringBuffer text = new StringBuffer("");
709 if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
710 text.append("#");
711 }
712 text.append(LEVELS_STR);
713 text.append(TAB_CHARACTER);
714 text.append(TAB_CHARACTER);
715 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
716 int content_elements_length = content_elements.getLength();
717 // Don't output anything if no levels are set.
718 if(content_elements_length == 0) {
719 return null;
720 }
721 for(int i = 0; i < content_elements_length; i++) {
722 Element content_element = (Element) content_elements.item(i);
723 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
724 text.append(SPACE_CHARACTER);
725 }
726 return text.substring(0, text.length() - 1);
727 }
728
729 static public String metadataToString(Element command_element) {
730 // If there is no value attribute, then we don't write anything
731 String value_str = MSMUtils.getValue(command_element);
732 if(value_str == null || value_str.length() == 0) {
733 return "";
734 }
735 else {
736 StringBuffer text = new StringBuffer("");
737 String name_str = command_element.getAttribute(NAME_ATTRIBUTE);
738 // If the name is one of the special four, we don't write the collectionmeta first. Note the maintainer collectionmeta is singled out for 'prittying' reasons.
739 if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
740 text.append(name_str);
741 text.append(TAB_CHARACTER);
742 }
743 else if(name_str.equals(COLLECTIONMETADATA_BETA_STR) || name_str.equals(COLLECTIONMETADATA_CREATOR_STR) || name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
744 text.append(name_str);
745 text.append(TAB_CHARACTER);
746 text.append(TAB_CHARACTER);
747 }
748 else {
749 text.append(COLLECTIONMETADATA_STR);
750 text.append(TAB_CHARACTER);
751 text.append(name_str);
752 text.append(SPACE_CHARACTER);
753 String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE);
754 // If this is element is in english, and it is the first one found, we don't need to write the language argument.
755 if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) {
756 text.append(LBRACKET_CHARACTER);
757 text.append(LANGUAGE_ARGUMENT);
758 text.append(language_str);
759 text.append(RBRACKET_CHARACTER);
760 text.append(SPACE_CHARACTER);
761 }
762 if(known_metadata != null) {
763 known_metadata.add(name_str);
764 }
765 language_str = null;
766 }
767 name_str = null;
768 // We don't wrap the email addresses in quotes, nor any string without spaces
769 if(value_str.indexOf(SPACE_CHARACTER) == -1) {
770 text.append(value_str);
771 }
772 else {
773 text.append(QUOTE_CHARACTER);
774 text.append(value_str);
775 text.append(QUOTE_CHARACTER);
776 }
777 value_str = null;
778 return text.toString();
779 }
780 }
781
782 /** Parse a collect.cfg into a DOM model representation. */
783 private void parse(File collect_config_file) {
784 try {
785 Element collect_cfg_element = document.getDocumentElement();
786 // Read in the file command at a time.
787 FileReader in_reader = new FileReader(collect_config_file);
788 BufferedReader in = new BufferedReader(in_reader);
789 String command_str = null;
790 while((command_str = in.readLine()) != null) {
791 Element command_element = null;
792 // A command may be broken over several lines.
793 command_str = command_str.trim();
794 boolean eof = false;
795 while(!eof && command_str.endsWith(NEWLINE_CHARACTER)) {
796 String next_line = in.readLine();
797 if(next_line != null) {
798 next_line = next_line.trim();
799 if(next_line.length() > 0) {
800 // Remove the new line character
801 command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
802 // And append the next line, which due to the test above must be non-zero length
803 command_str = command_str + next_line;
804 }
805 next_line = null;
806 }
807 // If we've reached the end of the file theres nothing more we can do
808 else {
809 eof = true;
810 }
811 }
812 // If there is still a new line character, then we remove it and hope for the best
813 if(command_str.endsWith(NEWLINE_CHARACTER)) {
814 command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
815 }
816 // Now we've either got a command to parse...
817 if(command_str.length() != 0) {
818 // Start trying to figure out what it is
819 StringTokenizer tokenizer = new StringTokenizer(command_str);
820 String command_type = tokenizer.nextToken().toLowerCase();
821 tokenizer = null;
822 // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created
823 if(command_element == null && command_type.equals(CLASSIFY_STR)) {
824 command_element = parseClassify(command_str);
825 }
826 if(command_element == null && command_type.equals(FORMAT_STR)) {
827 command_element = parseFormat(command_str);
828 }
829 if(command_element == null && (command_type.equals(INDEX_STR) || command_type.equals(COMMENTED_INDEXES_STR))) {
830 command_element = parseIndex(command_str);
831 }
832 if(command_element == null && (command_type.equals(INDEX_DEFAULT_STR) || command_type.equals(COMMENTED_INDEX_DEFAULT_STR))) {
833 command_element = parseIndexDefault(command_str);
834 }
835 if(command_element == null && command_type.equals(LANGUAGES_STR)) {
836 command_element = parseLanguage(command_str);
837 }
838 if(command_element == null && command_type.equals(LANGUAGE_DEFAULT_STR)) {
839 command_element = parseLanguageDefault(command_str);
840 }
841 if(command_element == null && (command_type.equals(LEVELS_STR) || command_type.equals(COMMENTED_LEVELS_STR))) {
842 command_element = parseLevels(command_str);
843 }
844 if(command_element == null && command_type.equals(COLLECTIONMETADATA_STR)) {
845 command_element = parseMetadata(command_str);
846 }
847 if(command_element == null && (command_type.equals(COLLECTIONMETADATA_BETA_STR) || command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR))) {
848 command_element = parseMetadataSpecial(command_str);
849 }
850 if(command_element == null && command_type.equals(PLUGIN_STR)) {
851 command_element = parsePlugIn(command_str);
852 }
853 if(command_element == null && command_type.equals(SEARCHTYPE_STR)) {
854 command_element = parseSearchType(command_str);
855 }
856 if(command_element == null && command_type.equals(SUBCOLLECTION_STR)) {
857 command_element = parseSubCollection(command_str);
858 }
859 if(command_element == null && command_type.equals(SUBCOLLECTION_DEFAULT_INDEX_STR)) {
860 command_element = parseSubCollectionDefaultIndex(command_str);
861 }
862 if(command_element == null && command_type.equals(SUBCOLLECTION_INDEX_STR)) {
863 command_element = parseSubCollectionIndex(command_str);
864 }
865 if(command_element == null && (command_type.equals(SUPERCOLLECTION_STR) || command_type.equals(CCS_STR))) {
866 command_element = parseSuperCollection(command_str);
867 }
868 // Doesn't match any known type
869 command_type = null;
870 if(command_element == null) {
871 // No-one knows what to do with this command, so we create an Unknown command element
872 command_element = document.createElement(UNKNOWN_ELEMENT);
873 MSMUtils.setValue(command_element, command_str);
874 }
875 }
876 // Or an empty line to remember for later
877 else {
878 command_element = document.createElement(NEWLINE_ELEMENT);
879 }
880 // Now command element shouldn't be null so we append it to the collection config DOM
881 collect_cfg_element.appendChild(command_element);
882 }
883 }
884 catch(Exception exception) {
885 Gatherer.println("Error in CollectionConfiguration.parse(java.io.File): " + exception);
886 Gatherer.printStackTrace(exception);
887 }
888 }
889
890 private Element parseClassify(String command_str) {
891 Element command_element = null;
892 try {
893 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
894 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
895 if(tokenizer.countTokens() >= 4) {
896 command_element = document.createElement(CLASSIFY_ELEMENT);
897 // First token is classify
898 tokenizer.nextToken();
899 // The next token is the classifier type
900 command_element.setAttribute(TYPE_ATTRIBUTE, tokenizer.nextToken());
901 // Now we parse out the remaining arguments into a hashmapping from name to value
902 HashMap arguments = parseArguments(tokenizer);
903 // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
904 Iterator names = arguments.keySet().iterator();
905 while(names.hasNext()) {
906 String name = (String) names.next();
907 String value = (String) arguments.get(name); // Can be null
908 // Everything else is an Option Element
909 Element option_element = document.createElement(OPTION_ELEMENT);
910 option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
911 if(value != null) {
912 // Remove any speech marks appended in strings containing whitespace
913 if(value.startsWith(QUOTE_CHARACTER) && value.endsWith(QUOTE_CHARACTER)) {
914 value = value.substring(1, value.length() - 1);
915 }
916 // The metadata argument gets added as the content attribute
917 if(name.equals(METADATA_ARGUMENT) && value != null) {
918 // The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace.
919 if(value.indexOf(MSMUtils.NS_SEP) == -1) {
920 value = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + value;
921 }
922 //command_element.setAttribute(CONTENT_ATTRIBUTE, value);
923 }
924 MSMUtils.setValue(option_element, value);
925 }
926 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
927 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
928 command_element.appendChild(option_element);
929 option_element = null;
930 name = null;
931 value = null;
932 }
933 names = null;
934 arguments = null;
935 }
936 tokenizer = null;
937 }
938 catch(Exception error) {
939 }
940 return command_element;
941 }
942
943 private Element parseFormat(String command_str) {
944 Element command_element = null;
945 try {
946 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
947 if(tokenizer.countTokens() >= 3) {
948 command_element = document.createElement(FORMAT_ELEMENT);
949 // First token is format
950 tokenizer.nextToken();
951 command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
952 String format_value = tokenizer.nextToken();
953 // If the value is true or false we add it as an attribute
954 if(format_value.equalsIgnoreCase(TRUE_STR) || format_value.equalsIgnoreCase(FALSE_STR)) {
955 command_element.setAttribute(VALUE_ATTRIBUTE, format_value.toLowerCase());
956 }
957 // Otherwise it gets added as a text node
958 else {
959 // Strip any speech marks
960 if(format_value.startsWith(QUOTE_CHARACTER) && format_value.endsWith(QUOTE_CHARACTER)) {
961 format_value = format_value.substring(1, format_value.length() - 1);
962 }
963 MSMUtils.setValue(command_element, format_value);
964 }
965 format_value = null;
966 }
967 tokenizer = null;
968 }
969 catch(Exception exception) {
970 }
971 return command_element;
972 }
973
974 private Element parseIndex(String command_str) {
975 Element command_element = null;
976 try {
977 StringTokenizer tokenizer = new StringTokenizer(command_str);
978 String command = tokenizer.nextToken();
979 if(tokenizer.hasMoreTokens()) {
980 command_element = document.createElement(INDEXES_ELEMENT);
981 }
982 command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(INDEX_STR) ? TRUE_STR : FALSE_STR));
983 command = null;
984 while(tokenizer.hasMoreTokens()) {
985 Element index_element = document.createElement(INDEX_ELEMENT);
986 String index_str = tokenizer.nextToken();
987 // There are two types of index we have to consider. Old G2.38 and earlier use level:source tuplets while G2.39+ have just a single, non-comma separated list where order is important.
988 boolean old_index;
989 if(index_str.indexOf(COLON_CHARACTER) != -1) {
990 old_index = true;
991 index_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
992 index_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
993 command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR);
994 }
995 else {
996 command_element.setAttribute(MGPP_ATTRIBUTE, TRUE_STR);
997 old_index = false;
998 }
999 StringTokenizer content_tokenizer = new StringTokenizer(index_str, StaticStrings.COMMA_CHARACTER);
1000 while(content_tokenizer.hasMoreTokens()) {
1001 Element content_element = document.createElement(CONTENT_ELEMENT);
1002 String content_str = content_tokenizer.nextToken();
1003 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1004 if(content_str.indexOf(MSMUtils.NS_SEP) == -1) {
1005 if(content_str.equals(StaticStrings.TEXT_STR) || (!old_index && content_str.equals(StaticStrings.ALLFIELDS_STR))) {
1006 // Our special strings are OK.
1007 }
1008 else {
1009 content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str;
1010 }
1011 }
1012 content_element.setAttribute(NAME_ATTRIBUTE, content_str);
1013 index_element.appendChild(content_element);
1014 content_element = null;
1015 }
1016 content_tokenizer = null;
1017 index_str = null;
1018 command_element.appendChild(index_element);
1019 index_element = null;
1020 }
1021 tokenizer = null;
1022 }
1023 catch (Exception exception) {
1024 exception.printStackTrace();
1025 }
1026 return command_element;
1027 }
1028
1029 private Element parseIndexDefault(String command_str) {
1030 Element command_element = null;
1031 try {
1032 StringTokenizer tokenizer = new StringTokenizer(command_str);
1033 if(tokenizer.countTokens() >= 2) {
1034 command_element = document.createElement(INDEX_DEFAULT_ELEMENT);
1035 command_element.setAttribute(ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken()).equals(INDEX_DEFAULT_STR) ? TRUE_STR : FALSE_STR));
1036 String index_str = tokenizer.nextToken();
1037 command_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
1038 String content_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
1039 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1040 while(content_tokenizer.hasMoreTokens()) {
1041 Element content_element = document.createElement(CONTENT_ELEMENT);
1042 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1043 command_element.appendChild(content_element);
1044 content_element = null;
1045 }
1046 content_tokenizer = null;
1047 content_str = null;
1048 content_str = null;
1049 index_str = null;
1050 }
1051 tokenizer = null;
1052 }
1053 catch (Exception exception) {
1054 }
1055 return command_element;
1056 }
1057
1058 private Element parseLanguage(String command_str) {
1059 Element command_element = null;
1060 try {
1061 StringTokenizer tokenizer = new StringTokenizer(command_str);
1062 tokenizer.nextToken();
1063 if(tokenizer.hasMoreTokens()) {
1064 command_element = document.createElement(LANGUAGES_ELEMENT);
1065 while(tokenizer.hasMoreTokens()) {
1066 Element language_element = document.createElement(LANGUAGE_ELEMENT);
1067 language_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1068 command_element.appendChild(language_element);
1069 language_element = null;
1070 }
1071 }
1072 tokenizer = null;
1073 }
1074 catch (Exception exception) {
1075 }
1076 return command_element;
1077 }
1078
1079 private Element parseLanguageDefault(String command_str) {
1080 Element command_element = null;
1081 try {
1082 StringTokenizer tokenizer = new StringTokenizer(command_str);
1083 if(tokenizer.countTokens() >= 2) {
1084 command_element = document.createElement(LANGUAGE_DEFAULT_ELEMENT);
1085 tokenizer.nextToken();
1086 String default_language_str = tokenizer.nextToken();
1087 command_element.setAttribute(NAME_ATTRIBUTE, default_language_str);
1088 default_language_str = null;
1089 }
1090 tokenizer = null;
1091 }
1092 catch (Exception exception) {
1093 }
1094 return command_element;
1095 }
1096
1097 private Element parseLevels(String command_str) {
1098 Element command_element = null;
1099 try {
1100 StringTokenizer tokenizer = new StringTokenizer(command_str);
1101 // First token is command type (levels)
1102 String command = tokenizer.nextToken();
1103 if(tokenizer.hasMoreTokens()) {
1104 command_element = document.createElement(LEVELS_ELEMENT);
1105 command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(LEVELS_STR) ? TRUE_STR : FALSE_STR));
1106 while(tokenizer.hasMoreTokens()) {
1107 Element level_element = document.createElement(CONTENT_ELEMENT);
1108 level_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1109 command_element.appendChild(level_element);
1110 level_element = null;
1111 }
1112 }
1113 command = null;
1114 }
1115 catch(Exception exception) {
1116 }
1117 return command_element;
1118 }
1119
1120 private Element parseMetadata(String command_str) {
1121 Element command_element = null;
1122 try {
1123 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1124 if(tokenizer.countTokens() >= 3) {
1125 command_element = document.createElement(COLLECTIONMETADATA_ELEMENT);
1126 // First token is command type
1127 tokenizer.nextToken();
1128 String name_str = tokenizer.nextToken();
1129 String value_str = tokenizer.nextToken();
1130 String language_str = "en"; // By default
1131 // Check if the value string is actually a language string
1132 if(value_str.startsWith(LBRACKET_CHARACTER) && value_str.endsWith(RBRACKET_CHARACTER)) {
1133 language_str = value_str.substring(value_str.indexOf(LANGUAGE_ARGUMENT) + 2, value_str.length() - 1);
1134 value_str = tokenizer.nextToken();
1135 }
1136 // Remove any speech marks from value
1137 if(value_str.startsWith(QUOTE_CHARACTER) && value_str.endsWith(QUOTE_CHARACTER)) {
1138 value_str = value_str.substring(1, value_str.length() - 1);
1139 }
1140 if(value_str != null) {
1141 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1142 command_element.setAttribute(LANGUAGE_ATTRIBUTE, language_str);
1143 MSMUtils.setValue(command_element, Utility.encodeXML(value_str));
1144 }
1145 else {
1146 command_element = null;
1147 }
1148 language_str = null;
1149 value_str = null;
1150 name_str = null;
1151 }
1152 tokenizer = null;
1153 }
1154 catch (Exception exception) {
1155 }
1156 return command_element;
1157 }
1158
1159 private Element parseMetadataSpecial(String command_str) {
1160 Element command_element = null;
1161 try {
1162 StringTokenizer tokenizer = new StringTokenizer(command_str);
1163 if(tokenizer.countTokens() >= 2) {
1164 String name_str = tokenizer.nextToken();
1165 String value_str = tokenizer.nextToken();
1166 if(name_str.equals(COLLECTIONMETADATA_BETA_STR)) {
1167 command_element = document.createElement(COLLECTIONMETADATA_BETA_ELEMENT);
1168 }
1169 else if(name_str.equals(COLLECTIONMETADATA_CREATOR_STR)) {
1170 command_element = document.createElement(COLLECTIONMETADATA_CREATOR_ELEMENT);
1171 }
1172 else if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
1173 command_element = document.createElement(COLLECTIONMETADATA_MAINTAINER_ELEMENT);
1174 }
1175 else if(name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
1176 command_element = document.createElement(COLLECTIONMETADATA_PUBLIC_ELEMENT);
1177 }
1178 if(command_element != null && value_str != null) {
1179 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1180 command_element.setAttribute(LANGUAGE_ATTRIBUTE, ENGLISH_LANGUAGE_STR);
1181 command_element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
1182 if(value_str.startsWith(QUOTE_CHARACTER) && value_str.endsWith(QUOTE_CHARACTER)) {
1183 value_str = value_str.substring(1, value_str.length() - 1);
1184 }
1185 MSMUtils.setValue(command_element, value_str);
1186 }
1187 value_str = null;
1188 name_str = null;
1189 }
1190 tokenizer = null;
1191 }
1192 catch (Exception exception) {
1193 }
1194 return command_element;
1195 }
1196
1197 private Element parsePlugIn(String command_str) {
1198 Element command_element = null;
1199 boolean use_metadata_files = false;
1200 try {
1201 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1202 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1203 if(tokenizer.countTokens() >= 2) {
1204 command_element = document.createElement(PLUGIN_ELEMENT);
1205 // First token is plugin
1206 tokenizer.nextToken();
1207 // The next token is the type
1208 String type = tokenizer.nextToken();
1209 command_element.setAttribute(TYPE_ATTRIBUTE, type);
1210 // Now we parse out the remaining arguments into a hashmapping from name to value
1211 HashMap arguments = parseArguments(tokenizer);
1212 // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
1213 Iterator names = arguments.keySet().iterator();
1214 while(names.hasNext()) {
1215 String name = (String) names.next();
1216 String value = (String) arguments.get(name); // Can be null
1217 Element option_element = document.createElement(OPTION_ELEMENT);
1218 if(name.substring(1).equals(USE_METADATA_FILES_ARGUMENT)) {
1219 use_metadata_files = true;
1220 }
1221 option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
1222 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1223 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); // All arguments are considered to be custom until matched against base plugins arguments
1224 if(value != null) {
1225 // Remove any speech marks appended in strings containing whitespace
1226 if(value.startsWith(QUOTE_CHARACTER) && value.endsWith(QUOTE_CHARACTER)) {
1227 value = value.substring(1, value.length() - 1);
1228 }
1229 if(name.equals(METADATA_ARGUMENT)) {
1230 // The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace.
1231 if(value.indexOf(MSMUtils.NS_SEP) == -1) {
1232 value = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + value;
1233 }
1234 }
1235 MSMUtils.setValue(option_element, value);
1236 }
1237 command_element.appendChild(option_element);
1238 option_element = null;
1239 name = null;
1240 value = null;
1241 }
1242 // Finally if the type happened to be a RecPlug we ensure that the use metadata files argument is set
1243 if(type.equals(RECPLUG_STR) && !use_metadata_files) {
1244 Element option_element = document.createElement(OPTION_ELEMENT);
1245 option_element.setAttribute(NAME_ATTRIBUTE, USE_METADATA_FILES_ARGUMENT);
1246 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1247 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
1248 command_element.appendChild(option_element);
1249 option_element = null;
1250 }
1251 type = null;
1252 names = null;
1253 arguments = null;
1254 }
1255 tokenizer = null;
1256 }
1257 catch(Exception exception) {
1258 }
1259 return command_element;
1260 }
1261
1262 private Element parseSearchType(String command_str) {
1263 Element command_element = null;
1264 try {
1265 StringTokenizer tokenizer = new StringTokenizer(command_str);
1266 // First token is command type (levels)
1267 tokenizer.nextToken();
1268 if(tokenizer.hasMoreTokens()) {
1269 command_element = document.createElement(SEARCHTYPE_ELEMENT);
1270 command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1271 while(tokenizer.hasMoreTokens()) {
1272 Element search_element = document.createElement(CONTENT_ELEMENT);
1273 search_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1274 command_element.appendChild(search_element);
1275 search_element = null;
1276 }
1277 }
1278 }
1279 catch(Exception exception) {
1280 }
1281 return command_element;
1282 }
1283
1284 private Element parseSubCollection(String command_str) {
1285 Element command_element = null;
1286 try {
1287 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1288 if(tokenizer.countTokens() >= 3) {
1289 command_element = document.createElement(SUBCOLLECTION_ELEMENT);
1290 // First token is command type
1291 tokenizer.nextToken();
1292 // Then subcollection identifier
1293 command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1294 // Then finally the pattern used to build the subcollection partition
1295 String full_pattern_str = tokenizer.nextToken();
1296 // To make life easier I'm going to parse this up now.
1297 boolean exclusion = (full_pattern_str.substring(1, 2).equals(EXCLAIMATION_CHARACTER));
1298 // Set inclusion/exclusion flag, remove any exclaimation mark and the speech marks
1299 if(exclusion) {
1300 full_pattern_str = full_pattern_str.substring(2, full_pattern_str.length() - 1);
1301 command_element.setAttribute(TYPE_ATTRIBUTE, EXCLUDE_STR);
1302 }
1303 else {
1304 full_pattern_str = full_pattern_str.substring(1, full_pattern_str.length() - 1);
1305 command_element.setAttribute(TYPE_ATTRIBUTE, INCLUDE_STR);
1306 }
1307 StringTokenizer pattern_tokenizer = new StringTokenizer(full_pattern_str, SEPARATOR_CHARACTER);
1308 if(pattern_tokenizer.countTokens() >= 2) {
1309 String content_str = pattern_tokenizer.nextToken();
1310 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1311 if(!content_str.equals(StaticStrings.FILENAME_STR) && content_str.indexOf(MSMUtils.NS_SEP) == -1) {
1312 content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str;
1313 }
1314 command_element.setAttribute(CONTENT_ATTRIBUTE, content_str);
1315 MSMUtils.setValue(command_element, pattern_tokenizer.nextToken());
1316 if(pattern_tokenizer.hasMoreTokens()) {
1317 command_element.setAttribute(OPTIONS_ATTRIBUTE, pattern_tokenizer.nextToken());
1318 }
1319 }
1320 pattern_tokenizer = null;
1321 }
1322 }
1323 catch(Exception exception) {
1324 exception.printStackTrace();
1325 }
1326 return command_element;
1327 }
1328
1329 private Element parseSubCollectionDefaultIndex(String command_str) {
1330 Element command_element = null;
1331 try {
1332 StringTokenizer tokenizer = new StringTokenizer(command_str);
1333 if(tokenizer.countTokens() == 2) {
1334 command_element = document.createElement(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT);
1335 tokenizer.nextToken();
1336 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1337 String content_str = tokenizer.nextToken();
1338 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1339 while(content_tokenizer.hasMoreTokens()) {
1340 Element content_element = document.createElement(CONTENT_ELEMENT);
1341 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1342 command_element.appendChild(content_element);
1343 content_element = null;
1344 }
1345 content_tokenizer = null;
1346 content_str = null;
1347 }
1348 tokenizer = null;
1349 }
1350 catch(Exception exception) {
1351 }
1352 return command_element;
1353 }
1354
1355 private Element parseSubCollectionIndex(String command_str) {
1356 Element command_element = null;
1357 try {
1358 StringTokenizer tokenizer = new StringTokenizer(command_str);
1359 tokenizer.nextToken();
1360 if(tokenizer.hasMoreTokens()) {
1361 command_element = document.createElement(SUBCOLLECTION_INDEXES_ELEMENT);
1362 }
1363 while(tokenizer.hasMoreTokens()) {
1364 Element subcollectionindex_element = document.createElement(INDEX_ELEMENT);
1365 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1366 String content_str = tokenizer.nextToken();
1367 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1368 while(content_tokenizer.hasMoreTokens()) {
1369 Element content_element = document.createElement(CONTENT_ELEMENT);
1370 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1371 subcollectionindex_element.appendChild(content_element);
1372 content_element = null;
1373 }
1374 content_tokenizer = null;
1375 content_str = null;
1376 command_element.appendChild(subcollectionindex_element);
1377 subcollectionindex_element = null;
1378 }
1379 tokenizer = null;
1380 }
1381 catch (Exception exception) {
1382 }
1383 return command_element;
1384 }
1385
1386 private Element parseSuperCollection(String command_str) {
1387 Element command_element = null;
1388 try {
1389 StringTokenizer tokenizer = new StringTokenizer(command_str);
1390 if(tokenizer.countTokens() >= 3) {
1391 command_element = document.createElement(SUPERCOLLECTION_ELEMENT);
1392 tokenizer.nextToken();
1393 while(tokenizer.hasMoreTokens()) {
1394 Element collection_element = document.createElement(COLLECTION_ELEMENT);
1395 collection_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1396 command_element.appendChild(collection_element);
1397 collection_element = null;
1398 }
1399 }
1400 tokenizer = null;
1401 }
1402 catch(Exception exception) {
1403 }
1404 return command_element;
1405 }
1406
1407 private String pluginToString(Element command_element, boolean show_extracted_namespace) {
1408 StringBuffer text = new StringBuffer();
1409 if(!command_element.getAttribute(SEPARATOR_ATTRIBUTE).equals(TRUE_STR)) {
1410 text.append(PLUGIN_STR);
1411 text.append(TAB_CHARACTER);
1412 text.append(TAB_CHARACTER);
1413 text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
1414 // Retrieve, and output, the arguments
1415 NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
1416 int option_elements_length = option_elements.getLength();
1417 if(option_elements_length > 0) {
1418 text.append(SPACE_CHARACTER);
1419 for(int j = 0; j < option_elements_length; j++) {
1420 Element option_element = (Element) option_elements.item(j);
1421 if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1422 text.append(StaticStrings.MINUS_CHARACTER);
1423 text.append(option_element.getAttribute(NAME_ATTRIBUTE));
1424 String value_str = MSMUtils.getValue(option_element);
1425 if(!show_extracted_namespace && value_str.startsWith(EXTRACTED_PREFIX)) {
1426 value_str = value_str.substring(EXTRACTED_PREFIX.length());
1427 }
1428 if(value_str.length() > 0) {
1429 text.append(SPACE_CHARACTER);
1430 if(value_str.indexOf(SPACE_CHARACTER) == -1) {
1431 text.append(value_str);
1432 }
1433 else {
1434 text.append(QUOTE_CHARACTER);
1435 text.append(value_str);
1436 text.append(QUOTE_CHARACTER);
1437 }
1438 }
1439 value_str = null;
1440 if(j < option_elements_length - 1) {
1441 text.append(SPACE_CHARACTER);
1442 }
1443 }
1444 option_element = null;
1445 }
1446 }
1447 option_elements = null;
1448 }
1449 return text.toString();
1450 }
1451
1452 private String searchtypeToString(Element command_element) {
1453 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1454 StringBuffer text = new StringBuffer(SEARCHTYPE_STR);
1455 text.append(TAB_CHARACTER);
1456 NodeList search_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1457 int search_elements_length = search_elements.getLength();
1458 for(int i = 0; i < search_elements_length; i++) {
1459 Element search_element = (Element) search_elements.item(i);
1460 text.append(search_element.getAttribute(NAME_ATTRIBUTE));
1461 text.append(SPACE_CHARACTER);
1462 }
1463 return text.substring(0, text.length() - 1);
1464 }
1465 else {
1466 return null;
1467 }
1468 }
1469
1470 private String subcollectionToString(Element command_element, boolean show_extracted_namespace) {
1471 StringBuffer text = new StringBuffer(SUBCOLLECTION_STR);
1472 text.append(SPACE_CHARACTER);
1473 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
1474 text.append(SPACE_CHARACTER);
1475 text.append(TAB_CHARACTER);
1476 text.append(QUOTE_CHARACTER);
1477 if(command_element.getAttribute(TYPE_ATTRIBUTE).equals(EXCLUDE_STR)) {
1478 text.append(EXCLAIMATION_CHARACTER);
1479 }
1480 String content_str = command_element.getAttribute(CONTENT_ATTRIBUTE);
1481 if(!show_extracted_namespace && content_str.startsWith(EXTRACTED_PREFIX)) {
1482 content_str = content_str.substring(EXTRACTED_PREFIX.length());
1483 }
1484 text.append(content_str);
1485 content_str = null;
1486 text.append(SEPARATOR_CHARACTER);
1487 text.append(MSMUtils.getValue(command_element));
1488 String options_str = command_element.getAttribute(OPTIONS_ATTRIBUTE);
1489 if(options_str.length() > 0) {
1490 text.append(SEPARATOR_CHARACTER);
1491 text.append(options_str);
1492 }
1493 options_str = null;
1494 text.append(QUOTE_CHARACTER);
1495 return text.toString();
1496 }
1497
1498 private String subcollectionDefaultIndexToString(Element command_element) {
1499 StringBuffer text = new StringBuffer(SUBCOLLECTION_DEFAULT_INDEX_STR);
1500 text.append(TAB_CHARACTER);
1501 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1502 int content_elements_length = content_elements.getLength();
1503 for(int j = 0; j < content_elements_length; j++) {
1504 Element content_element = (Element) content_elements.item(j);
1505 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1506 if(j < content_elements_length - 1) {
1507 text.append(StaticStrings.COMMA_CHARACTER);
1508 }
1509 }
1510 return text.toString();
1511 }
1512
1513 private String subcollectionIndexesToString(Element command_element) {
1514 StringBuffer text = new StringBuffer(SUBCOLLECTION_INDEX_STR);
1515 text.append(TAB_CHARACTER);
1516 // Retrieve all of the subcollection index partitions
1517 NodeList subcollectionindex_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
1518 int subcollectionindex_elements_length = subcollectionindex_elements.getLength();
1519 if(subcollectionindex_elements_length == 0) {
1520 return null;
1521 }
1522 for(int j = 0; j < subcollectionindex_elements_length; j++) {
1523 Element subcollectionindex_element = (Element) subcollectionindex_elements.item(j);
1524 NodeList content_elements = subcollectionindex_element.getElementsByTagName(CONTENT_ELEMENT);
1525 int content_elements_length = content_elements.getLength();
1526 for(int k = 0; k < content_elements_length; k++) {
1527 Element content_element = (Element) content_elements.item(k);
1528 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1529 if(k < content_elements_length - 1) {
1530 text.append(StaticStrings.COMMA_CHARACTER);
1531 }
1532 }
1533 if(j < subcollectionindex_elements_length - 1) {
1534 text.append(SPACE_CHARACTER);
1535 }
1536 }
1537 return text.toString();
1538 }
1539
1540 private String supercollectionToString(Element command_element) {
1541 NodeList content_elements = command_element.getElementsByTagName(COLLECTION_ELEMENT);
1542 int content_elements_length = content_elements.getLength();
1543 if(content_elements_length > 1) {
1544 StringBuffer text = new StringBuffer(SUPERCOLLECTION_STR);
1545 text.append(TAB_CHARACTER);
1546 for(int j = 0; j < content_elements_length; j++) {
1547 Element content_element = (Element) content_elements.item(j);
1548 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1549 if(j < content_elements_length - 1) {
1550 text.append(SPACE_CHARACTER);
1551 }
1552 }
1553 return text.toString();
1554 }
1555 return null;
1556 }
1557
1558 private String unknownToString(Element command_element) {
1559 return MSMUtils.getValue(command_element);
1560 }
1561
1562 /** Write the text to the buffer. This is used so we don't have to worry about storing intermediate String values just so we can calaulate length and offset.
1563 * @param writer the BufferedWriter to which the str will be written
1564 * @param str the String to be written
1565 */
1566 private void write(BufferedWriter writer, String str)
1567 throws IOException {
1568 writer.write(str, 0, str.length());
1569 }
1570}
Note: See TracBrowser for help on using the repository browser.