source: trunk/gli/src/org/greenstone/gatherer/cdm/CollectionConfiguration.java@ 5164

Last change on this file since 5164 was 5164, checked in by jmt12, 21 years ago

I can't remember what has changed, but I bet it was for the better

  • Property svn:keywords set to Author Date Id Revision
File size: 64.6 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.cdm;
28import java.awt.*;
29import java.awt.event.*;
30import java.io.*;
31import java.util.*;
32import javax.swing.*;
33import org.greenstone.gatherer.Gatherer;
34import org.greenstone.gatherer.cdm.CommandTokenizer;
35import org.greenstone.gatherer.msm.MSMUtils;
36import org.greenstone.gatherer.util.DOMTree;
37import org.greenstone.gatherer.util.StaticStrings;
38import org.greenstone.gatherer.util.Utility;
39import org.w3c.dom.*;
40/** This class provides either access to a pseudo-G3 document, or parses a collect.cfg file in such a way as to provide an xml-type view of its content. This later version is useful as it allows the manipulation and free form editing of a legacy collect.cfg file while still allowing the various CDM data managers to base themselves directly on this model (whereas they used to be independant ListModels which clobbered the ordering of unparsed commands).
41 * @author John Thompson, Greenstone Digital Library, University of Waikato
42 * @version 2.3d
43 */
44public class CollectionConfiguration
45 extends StaticStrings {
46
47 static public Document document;
48
49 static public void main(String[] args) {
50 if(args.length >= 1) {
51 File file = new File(args[0]);
52 CollectionConfiguration collect_cfg = new CollectionConfiguration(file);
53 collect_cfg.save(true);
54 collect_cfg.save(false);
55 collect_cfg = null;
56 }
57 else {
58 System.out.println("Usage: CollectionConfiguration <filename>");
59 }
60 }
61
62 /** Find the best insertion position for the given DOM Element. This should try to match command tag, and if found should then try to group by name or type (eg CollectionMeta), or append to end is no such grouping exists (eg PlugIns). Failing a command match it will check against the command order for the best insertion location.
63 * @param element the command Element to be inserted
64 * @return the Element which the given command should be inserted before, or null to append to end of list
65 */
66 static public Node findInsertionPoint(Element target_element) {
67 ///ystem.err.println("Find insertion point: " + target_element.getNodeName());
68 String target_element_name = target_element.getNodeName();
69 Element document_element = document.getDocumentElement();
70 // Try to find commands with the same tag.
71 NodeList matching_elements = document_element.getElementsByTagName(target_element_name);
72 // If we found matching elements, then we have our most likely insertion location, so check within for groupings
73 if(matching_elements.getLength() != 0) {
74 ///ystem.err.println("Found matching elements.");
75 // Only CollectionMeta are grouped.
76 if(target_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
77 ///ystem.err.println("Dealing with collection metadata");
78 // Special case: CollectionMeta can be added at either the start or end of a collection configuration file. However the start position is reserved for special metadata, so if no non-special metadata can be found we must append to the end.
79 // So if the command to be added is special add it immediately after any other special command
80 if(target_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
81 int index = 0;
82 Element matched_element = (Element) matching_elements.item(index);
83 Element sibling_element = (Element) matched_element.getNextSibling();
84 while(sibling_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
85 index++;
86 matched_element = (Element) matching_elements.item(index);
87 sibling_element = (Element) matched_element.getNextSibling();
88 }
89 if(sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
90 Element newline_element = document.createElement(NEWLINE_ELEMENT);
91 document_element.insertBefore(newline_element, sibling_element);
92 }
93 return sibling_element;
94 }
95 // Otherwise try to find a matching 'name' and add after the last one in that group.
96 else {
97 int index = 0;
98 target_element_name = target_element.getAttribute(NAME_ATTRIBUTE);
99 boolean found = false;
100 // Skip all of the special metadata
101 Element matched_element = (Element) matching_elements.item(index);
102 while(matched_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
103 index++;
104 matched_element = (Element) matching_elements.item(index);
105 }
106 // Begin search
107 while(!found && matched_element != null) {
108 if(matched_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
109 found = true;
110 }
111 else {
112 index++;
113 matched_element = (Element) matching_elements.item(index);
114 }
115 }
116 // If we found a match, we need to continue checking until we find the last name match.
117 if(found) {
118 index++;
119 Element previous_sibling = matched_element;
120 Element sibling_element = (Element) matching_elements.item(index);
121 while(sibling_element != null && sibling_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
122 previous_sibling = sibling_element;
123 index++;
124 sibling_element = (Element) matching_elements.item(index);
125 }
126 // Previous sibling now holds the command immediately before where we want to add, so find its next sibling and add to that. In this one case we can ignore new lines!
127 return previous_sibling.getNextSibling();
128 }
129 // If not found we just add after last metadata element
130 else {
131 Element last_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
132 return last_element.getNextSibling();
133 }
134 }
135
136 }
137 else {
138 ///ystem.err.println("Not dealing with collection meta.");
139 Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
140 // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
141 Node sibling_element = matched_element.getNextSibling();
142 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
143 Element newline_element = document.createElement(NEWLINE_ELEMENT);
144 document_element.insertBefore(newline_element, sibling_element);
145 }
146 return sibling_element; // Note that this may be null
147 }
148 }
149 ///ystem.err.println("No matching elements found.");
150 // Locate where this command is in the ordering
151 int command_index = -1;
152 for(int i = 0; command_index == -1 && i < COMMAND_ORDER.length; i++) {
153 if(COMMAND_ORDER[i].equals(target_element_name)) {
154 command_index = i;
155 }
156 }
157 ///ystem.err.println("Command index is: " + command_index);
158 // Now move forward, checking for existing elements in each of the preceeding command orders.
159 int preceeding_index = command_index - 1;
160 ///ystem.err.println("Searching before the target command.");
161 while(preceeding_index >= 0) {
162 matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[preceeding_index]);
163 // If we've found a match
164 if(matching_elements.getLength() > 0) {
165 // We add after the last element
166 Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
167 // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
168 Node sibling_element = matched_element.getNextSibling();
169 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
170 Element newline_element = document.createElement(NEWLINE_ELEMENT);
171 document_element.insertBefore(newline_element, sibling_element);
172 }
173 return sibling_element; // Note that this may be null
174 }
175 preceeding_index--;
176 }
177 // If all that fails, we now move backwards through the commands
178 int susceeding_index = command_index + 1;
179 ///ystem.err.println("Searching after the target command.");
180 while(susceeding_index < COMMAND_ORDER.length) {
181 matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[susceeding_index]);
182 // If we've found a match
183 if(matching_elements.getLength() > 0) {
184 // We add before the first element
185 Element matched_element = (Element) matching_elements.item(0);
186 // One final quick test. If the matched element is immediately preceeded by a NewLine command, then we insert another NewLine before the matched command, then return this new NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
187 Node sibling_element = matched_element.getPreviousSibling();
188 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
189 Element newline_element = document.createElement(NEWLINE_ELEMENT);
190 document_element.insertBefore(newline_element, sibling_element);
191 }
192 return sibling_element; // Note that this may be null
193 }
194 susceeding_index++;
195 }
196 // Well. Apparently there are no other commands in this collection configuration. So append away...
197 return null;
198 }
199
200 static public String toString(Element command_element, boolean show_extracted_namespace) {
201 String command_element_name = command_element.getNodeName();
202 if(command_element_name.equals(CLASSIFY_ELEMENT)) {
203 return self.classifyToString(command_element, show_extracted_namespace);
204 }
205 else if(command_element_name.equals(FORMAT_ELEMENT)) {
206 return self.formatToString(command_element, show_extracted_namespace);
207 }
208 else if(command_element_name.equals(INDEXES_ELEMENT)) {
209 return self.indexesToString(command_element, show_extracted_namespace);
210 }
211 else if(command_element_name.equals(INDEX_DEFAULT_ELEMENT)) {
212 return self.indexDefaultToString(command_element, show_extracted_namespace);
213 }
214 else if(command_element_name.equals(LANGUAGES_ELEMENT)) {
215 return self.languagesToString(command_element);
216 }
217 else if(command_element_name.equals(LANGUAGE_DEFAULT_ELEMENT)) {
218 return self.languageDefaultToString(command_element);
219 }
220 else if(command_element_name.equals(LEVELS_ELEMENT)) {
221 return self.levelsToString(command_element);
222 }
223 else if(command_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
224 return self.metadataToString(command_element);
225 }
226 else if(command_element_name.equals(COLLECTIONMETADATA_CREATOR_ELEMENT)) {
227 return self.metadataToString(command_element);
228 }
229 else if(command_element_name.equals(COLLECTIONMETADATA_MAINTAINER_ELEMENT)) {
230 return self.metadataToString(command_element);
231 }
232 else if(command_element_name.equals(COLLECTIONMETADATA_PUBLIC_ELEMENT)) {
233 return self.metadataToString(command_element);
234 }
235 else if(command_element_name.equals(COLLECTIONMETADATA_BETA_ELEMENT)) {
236 return self.metadataToString(command_element);
237 }
238 else if(command_element_name.equals(PLUGIN_ELEMENT)) {
239 return self.pluginToString(command_element, show_extracted_namespace);
240 }
241 else if(command_element_name.equals(SEARCHTYPE_ELEMENT)) {
242 return self.searchtypeToString(command_element);
243 }
244 else if(command_element_name.equals(SUBCOLLECTION_ELEMENT)) {
245 return self.subcollectionToString(command_element, show_extracted_namespace);
246 }
247 else if(command_element_name.equals(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) {
248 return self.subcollectionDefaultIndexToString(command_element);
249 }
250 else if(command_element_name.equals(SUBCOLLECTION_INDEXES_ELEMENT)) {
251 return self.subcollectionIndexesToString(command_element);
252 }
253 else if(command_element_name.equals(SUPERCOLLECTION_ELEMENT)) {
254 return self.supercollectionToString(command_element);
255 }
256 else if(command_element_name.equals(UNKNOWN_ELEMENT)) {
257 return self.unknownToString(command_element);
258 }
259 return "";
260 }
261
262 /** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix.
263 * @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string
264 * @return a HashMap containing the arguments parsed
265 */
266 static public HashMap parseArguments(CommandTokenizer tokenizer) {
267 HashMap arguments = new HashMap();
268 String name = null;
269 String value = null;
270 while(tokenizer.hasMoreTokens() || name != null) {
271 // First we retrieve a name if we need one.
272 if(name == null) {
273 name = tokenizer.nextToken();
274 }
275 // Now we attempt to retrieve a value
276 if(tokenizer.hasMoreTokens()) {
277 value = tokenizer.nextToken();
278 // Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop.
279 if(value.startsWith(StaticStrings.MINUS_CHARACTER)) {
280 arguments.put(name, null);
281 name = value;
282 }
283 // Otherwise we have a typical name->value pair ready to go
284 else {
285 arguments.put(name, value);
286 name = null;
287 }
288 }
289 // Otherwise its a binary flag
290 else {
291 arguments.put(name, null);
292 name = null;
293 }
294 }
295 return arguments;
296 }
297
298 static private ArrayList known_metadata;
299
300 static private CollectionConfiguration self;
301
302 static final private String EXTRACTED_PREFIX = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP;
303 /** Gives the preferred ordering of commands */
304 static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, StaticStrings.COLLECTIONMETADATA_BETA_ELEMENT, StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT};
305
306 /** ************************** Public Data Members ***************************/
307
308 /** ************************** Private Data Members ***************************/
309
310 private File collect_config_file;
311
312 /** ************************** Public Methods ***************************/
313
314 public CollectionConfiguration(File collect_config_file) {
315 this.self = this;
316 this.collect_config_file = collect_config_file;
317 // If collect_cfg is xml we can load it straight away
318 String collect_config_name = collect_config_file.getName();
319 if(collect_config_name.equals(COLLECTCONFIGURATION_XML)) {
320 // Parse with Utility but don't use class loader
321 document = Utility.parse(collect_config_file, false);
322 }
323 // Otherwise if this is a legacy collect.cfg file then read in the template and send to magic parser
324 else if(collect_config_name.equals(COLLECT_CFG)) {
325 document = Utility.parse(PSEUDO_COLLECTCONFIGURATION_XML, true);
326 parse(collect_config_file);
327 }
328 }
329
330 /** This debug facility shows the currently loaded collect.cfg or CollectConfig.xml file as a DOM tree. */
331 public void display() {
332 JDialog dialog = new JDialog(Gatherer.g_man, "Collection Configuration", false);
333 dialog.setSize(400,400);
334 JPanel content_pane = (JPanel) dialog.getContentPane();
335 final DOMTree tree = new DOMTree(document);
336 JButton refresh_button = new JButton("Refresh Tree");
337 refresh_button.addActionListener(new ActionListener() {
338 public void actionPerformed(ActionEvent event) {
339 tree.setDocument(document);
340 }
341 });
342 content_pane.setBorder(BorderFactory.createEmptyBorder(5,5,5,5));
343 content_pane.setLayout(new BorderLayout());
344 content_pane.add(new JScrollPane(tree), BorderLayout.CENTER);
345 content_pane.add(refresh_button, BorderLayout.SOUTH);
346 dialog.show();
347 }
348
349 public Element getBeta() {
350 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_BETA_ELEMENT, null, null);
351 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_BETA_STR);
352 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
353 return element;
354 }
355
356 public Element getCreator() {
357 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_CREATOR_ELEMENT, null, null);
358 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_CREATOR_STR);
359 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
360 return element;
361 }
362
363 public Element getDocumentElement() {
364 return document.getDocumentElement();
365 }
366
367 public File getFile() {
368 return collect_config_file;
369 }
370
371 /** Retrieve or create the languages Element. */
372 public Element getLanguages() {
373 return getOrCreateElementByTagName(LANGUAGES_ELEMENT, null, null);
374 }
375
376 public Element getLevels() {
377 return getOrCreateElementByTagName(LEVELS_ELEMENT, null, null);
378 }
379
380 public Element getMaintainer() {
381 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_MAINTAINER_ELEMENT, null, null);
382 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_MAINTAINER_STR);
383 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
384 return element;
385 }
386
387 /** Retrieve or create the indexes Element. Note that this method behaves differently from the other getBlah methods, in that it also has to keep in mind that indexes come in two flavours, MG and MGPP. */
388 public Element getMGIndexes() {
389 return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, FALSE_STR);
390 }
391
392 public Element getMGPPIndexes() {
393 return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, TRUE_STR);
394 }
395
396 public Element getPublic() {
397 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_PUBLIC_ELEMENT, null, null);
398 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_PUBLIC_STR);
399 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
400 return element;
401 }
402
403 /** Retrieve or create the searchtype element. */
404 public Element getSearchType() {
405 ///ystem.err.println("Get or create element by tag name: " + name);
406 Element document_element = document.getDocumentElement();
407 NodeList elements = document_element.getElementsByTagName(SEARCHTYPE_ELEMENT);
408 int elements_length = elements.getLength();
409 if(elements_length > 0) {
410 document_element = null;
411 return (Element) elements.item(0);
412 }
413 // Create the element
414 Element element = document.createElement(SEARCHTYPE_ELEMENT);
415 Node target_node = findInsertionPoint(element);
416 if(target_node != null) {
417 document_element.insertBefore(element, target_node);
418 }
419 else {
420 document_element.appendChild(element);
421 }
422 document_element = null;
423 // Append a default search type node - form
424 Element a_searchtype_element = CollectionDesignManager.collect_config.document.createElement(CollectionConfiguration.CONTENT_ELEMENT);
425 a_searchtype_element.setAttribute(CollectionConfiguration.NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]);
426 element.appendChild(a_searchtype_element);
427 return element;
428 }
429
430 /** Retrieve or create the subindexes Element. */
431 public Element getSubIndexes() {
432 return getOrCreateElementByTagName(SUBCOLLECTION_INDEXES_ELEMENT, null, null);
433 }
434
435 /** Retrieve or create the supercollections Element. */
436 public Element getSuperCollection() {
437 return getOrCreateElementByTagName(SUPERCOLLECTION_ELEMENT, null, null);
438 }
439
440 public void save() {
441 save(false);
442 }
443
444 public void save(boolean force_xml) {
445 if(collect_config_file.exists()) {
446 File original_file = new File(collect_config_file.getParentFile(), COLLECT_CFG);
447 File backup_file = new File(collect_config_file.getParentFile(), "collect.bak");
448 if(backup_file.exists()) {
449 backup_file.delete();
450 }
451 if(!original_file.renameTo(backup_file)) {
452 Gatherer.println("Can't rename collect.cfg");
453 }
454 }
455 if(force_xml || collect_config_file.getName().equals(COLLECTCONFIGURATION_XML)) {
456 ///ystem.err.println("Writing XML");
457 Utility.export(document, new File(collect_config_file.getParentFile(), COLLECTCONFIGURATION_XML));
458 }
459 else {
460 ///ystem.err.println("Writing text");
461 try {
462 FileWriter file_writer = new FileWriter(collect_config_file, false);
463 BufferedWriter buffered_writer = new BufferedWriter(file_writer);
464 // In order to write out an old style collect.cfg we have to traverse the model and do several 'cute' tricks to ensure the collect.cfg is valid (for instance while every metadata element has a language attribute, only second or subsequent metadata, for a certain name, needs a language argument - hence the known metadata array. Note that within GLI the language will always be shown, but it doesn't crash and burn like G2 does, te-he).
465 known_metadata = new ArrayList();
466 Element collect_config_element = document.getDocumentElement();
467 NodeList command_elements = collect_config_element.getChildNodes();
468 boolean just_wrote_newline = false; // Prevent two or more newlines in a row
469 for(int i = 0; i < command_elements.getLength(); i++) {
470 Node command_node = command_elements.item(i);
471 if(command_node instanceof Element) {
472 Element command_element = (Element) command_node;
473 // The only thing left are NewLine elements
474 if(command_element.getNodeName().equals(NEWLINE_ELEMENT) && !just_wrote_newline) {
475 buffered_writer.newLine();
476 just_wrote_newline = true;
477 }
478 // Anything else we write to file, but only if it has been assigned, the exception being the Indexes element which just get commented if unassigned (a side effect of MG && MGPP compatibility)
479 else if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR) || command_element.getNodeName().equals(INDEXES_ELEMENT) || command_element.getNodeName().equals(INDEX_DEFAULT_ELEMENT) || command_element.getNodeName().equals(LEVELS_ELEMENT)){
480 String command = toString(command_element, false);
481 if(command != null && command.length() > 0) {
482 write(buffered_writer, command);
483 buffered_writer.newLine();
484 just_wrote_newline = false;
485 }
486 }
487 }
488 }
489 buffered_writer.close();
490 known_metadata = null;
491 }
492 catch (Exception exception) {
493 Gatherer.println("Error in CollectionConfiguration.save(boolean): " + exception);
494 Gatherer.printStackTrace(exception);
495 }
496 }
497 }
498
499 /** ************************** Private Methods ***************************/
500
501 private String classifyToString(Element command_element, boolean show_extracted_namespace) {
502 StringBuffer text = new StringBuffer(CLASSIFY_STR);
503 text.append(TAB_CHARACTER);
504 text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
505 text.append(SPACE_CHARACTER);
506 NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
507 int option_elements_length = option_elements.getLength();
508 for(int j = 0; j < option_elements_length; j++) {
509 Element option_element = (Element) option_elements.item(j);
510 if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
511 text.append(StaticStrings.MINUS_CHARACTER);
512 text.append(option_element.getAttribute(NAME_ATTRIBUTE));
513 String value_str = MSMUtils.getValue(option_element);
514 if(value_str.length() > 0) {
515 // If the value happens to be the identifier of an extracted metadata element, then remove the namespace.
516 if(!show_extracted_namespace && value_str.startsWith(EXTRACTED_PREFIX)) {
517 value_str = value_str.substring(EXTRACTED_PREFIX.length());
518 }
519 text.append(SPACE_CHARACTER);
520 if(value_str.indexOf(SPACE_CHARACTER) == -1) {
521 text.append(value_str);
522 }
523 else {
524 text.append(QUOTE_CHARACTER);
525 text.append(value_str);
526 text.append(QUOTE_CHARACTER);
527 }
528 }
529 value_str = null;
530 if(j < option_elements_length - 1) {
531 text.append(SPACE_CHARACTER);
532 }
533 }
534 option_element = null;
535 }
536 option_elements = null;
537 return text.toString();
538 }
539
540 private String formatToString(Element command_element, boolean show_extracted_namespace) {
541 StringBuffer text = new StringBuffer(FORMAT_STR);
542 text.append(SPACE_CHARACTER);
543 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
544 text.append(SPACE_CHARACTER);
545 String value_str = command_element.getAttribute(VALUE_ATTRIBUTE);
546 if(value_str.length() != 0) {
547 text.append(value_str);
548 }
549 else {
550 value_str = MSMUtils.getValue(command_element);
551 // Remove any references to a namespace for extracted metadata
552 if(!show_extracted_namespace) {
553 value_str.replaceAll(EXTRACTED_PREFIX, "");
554 }
555 text.append(QUOTE_CHARACTER);
556 text.append(value_str);
557 text.append(QUOTE_CHARACTER);
558 }
559 value_str = null;
560 return text.toString();
561 }
562
563 /** Retrieve or create the indexes Element. */
564 private Element getOrCreateElementByTagName(String name, String conditional_attribute, String required_value) {
565 ///ystem.err.println("Get or create element by tag name: " + name);
566 Element document_element = document.getDocumentElement();
567 NodeList elements = document_element.getElementsByTagName(name);
568 int elements_length = elements.getLength();
569 if(elements_length > 0) {
570 if(conditional_attribute == null) {
571 document_element = null;
572 return (Element) elements.item(0);
573 }
574 else {
575 for(int i = 0; i < elements_length; i++) {
576 Element element = (Element) elements.item(i);
577 if(element.getAttribute(conditional_attribute).equals(required_value)) {
578 document_element = null;
579 return element;
580 }
581 element = null;
582 }
583 }
584 }
585 // Create the element
586 Element element = document.createElement(name);
587 // If there was a property set it
588 if(conditional_attribute != null) {
589 element.setAttribute(conditional_attribute, required_value);
590 }
591 Node target_node = findInsertionPoint(element);
592 if(target_node != null) {
593 document_element.insertBefore(element, target_node);
594 }
595 else {
596 document_element.appendChild(element);
597 }
598 document_element = null;
599 return element;
600 }
601
602 private String indexesToString(Element command_element, boolean show_extracted_namespace) {
603 boolean comment_only = false;
604 StringBuffer text = new StringBuffer("");
605 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
606 text.append("#");
607 comment_only = true;
608 }
609 text.append(INDEX_STR);
610 text.append(TAB_CHARACTER);
611 if(!comment_only) {
612 text.append(TAB_CHARACTER);
613 }
614 NodeList index_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
615 // For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list
616 int index_elements_length = index_elements.getLength();
617 for(int j = 0; j < index_elements_length; j++) {
618 Element index_element = (Element) index_elements.item(j);
619 String level_str = index_element.getAttribute(LEVEL_ATTRIBUTE);
620 if(level_str.length() > 0) {
621 text.append(level_str);
622 text.append(StaticStrings.COLON_CHARACTER);
623 }
624 NodeList content_elements = index_element.getElementsByTagName(CONTENT_ELEMENT);
625 int content_elements_length = content_elements.getLength();
626 // Don't output anything if no indexes are set
627 if(content_elements_length == 0) {
628 return null;
629 }
630 for(int k = 0; k < content_elements_length; k++) {
631 Element content_element = (Element) content_elements.item(k);
632 String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
633 if(!show_extracted_namespace && name_str.startsWith(EXTRACTED_PREFIX)) {
634 name_str = name_str.substring(EXTRACTED_PREFIX.length());
635 }
636 text.append(name_str);
637 name_str = null;
638 if(k < content_elements_length - 1) {
639 text.append(StaticStrings.COMMA_CHARACTER);
640 }
641 content_element = null;
642 }
643 if(j < index_elements_length - 1) {
644 text.append(SPACE_CHARACTER);
645 }
646 content_elements = null;
647 index_element = null;
648 }
649 index_elements = null;
650 return text.toString();
651 }
652
653 private String indexDefaultToString(Element command_element, boolean show_extracted_namespace) {
654 StringBuffer text = new StringBuffer("");
655 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
656 text.append("#");
657 }
658 text.append(INDEX_DEFAULT_STR);
659 text.append(TAB_CHARACTER);
660 text.append(command_element.getAttribute(LEVEL_ATTRIBUTE));
661 text.append(StaticStrings.COLON_CHARACTER);
662 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
663 int content_elements_length = content_elements.getLength();
664 for(int j = 0; j < content_elements_length; j++) {
665 Element content_element = (Element) content_elements.item(j);
666 String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
667 if(!show_extracted_namespace && name_str.startsWith(EXTRACTED_PREFIX)) {
668 name_str = name_str.substring(EXTRACTED_PREFIX.length());
669 }
670 text.append(name_str);
671 name_str = null;
672 if(j < content_elements_length - 1) {
673 text.append(StaticStrings.COMMA_CHARACTER);
674 }
675 content_element = null;
676 }
677 content_elements = null;
678 return text.toString();
679 }
680
681 private String languagesToString(Element command_element) {
682 StringBuffer text = new StringBuffer(LANGUAGES_STR);
683 text.append(TAB_CHARACTER);
684 // Retrieve all the languages and write them out in a space separated list
685 NodeList language_elements = command_element.getElementsByTagName(LANGUAGE_ELEMENT);
686 int language_elements_length = language_elements.getLength();
687 if(language_elements_length == 0) {
688 return null;
689 }
690 for(int j = 0; j < language_elements_length; j++) {
691 Element language_element = (Element) language_elements.item(j);
692 text.append(language_element.getAttribute(NAME_ATTRIBUTE));
693 if(j < language_elements_length - 1) {
694 text.append(SPACE_CHARACTER);
695 }
696 }
697 return text.toString();
698 }
699
700 private String languageDefaultToString(Element command_element) {
701 StringBuffer text = new StringBuffer(LANGUAGE_DEFAULT_STR);
702 text.append(TAB_CHARACTER);
703 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
704 return text.toString();
705 }
706
707 private String levelsToString(Element command_element) {
708 StringBuffer text = new StringBuffer("");
709 if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
710 text.append("#");
711 }
712 text.append(LEVELS_STR);
713 text.append(TAB_CHARACTER);
714 text.append(TAB_CHARACTER);
715 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
716 int content_elements_length = content_elements.getLength();
717 // Don't output anything if no levels are set.
718 if(content_elements_length == 0) {
719 return null;
720 }
721 for(int i = 0; i < content_elements_length; i++) {
722 Element content_element = (Element) content_elements.item(i);
723 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
724 text.append(SPACE_CHARACTER);
725 }
726 return text.substring(0, text.length() - 1);
727 }
728
729 static public String metadataToString(Element command_element) {
730 // If there is no value attribute, then we don't write anything
731 String value_str = MSMUtils.getValue(command_element);
732 if(value_str == null || value_str.length() == 0) {
733 return "";
734 }
735 else {
736 StringBuffer text = new StringBuffer("");
737 String name_str = command_element.getAttribute(NAME_ATTRIBUTE);
738 // If the name is one of the special four, we don't write the collectionmeta first. Note the maintainer collectionmeta is singled out for 'prittying' reasons.
739 if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
740 text.append(name_str);
741 text.append(TAB_CHARACTER);
742 }
743 else if(name_str.equals(COLLECTIONMETADATA_BETA_STR) || name_str.equals(COLLECTIONMETADATA_CREATOR_STR) || name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
744 text.append(name_str);
745 text.append(TAB_CHARACTER);
746 text.append(TAB_CHARACTER);
747 }
748 else {
749 text.append(COLLECTIONMETADATA_STR);
750 text.append(TAB_CHARACTER);
751 text.append(name_str);
752 text.append(SPACE_CHARACTER);
753 String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE);
754 // If this is element is in english, and it is the first one found, we don't need to write the language argument.
755 if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) {
756 text.append(LBRACKET_CHARACTER);
757 text.append(LANGUAGE_ARGUMENT);
758 text.append(language_str);
759 text.append(RBRACKET_CHARACTER);
760 text.append(SPACE_CHARACTER);
761 }
762 if(known_metadata != null) {
763 known_metadata.add(name_str);
764 }
765 language_str = null;
766 }
767 name_str = null;
768 // We don't wrap the email addresses in quotes, nor any string without spaces
769 if(value_str.indexOf(SPACE_CHARACTER) == -1) {
770 text.append(value_str);
771 }
772 else {
773 text.append(QUOTE_CHARACTER);
774 text.append(value_str);
775 text.append(QUOTE_CHARACTER);
776 }
777 value_str = null;
778 return text.toString();
779 }
780 }
781
782 /** Parse a collect.cfg into a DOM model representation. */
783 private void parse(File collect_config_file) {
784 try {
785 Element collect_cfg_element = document.getDocumentElement();
786 // Read in the file command at a time.
787 FileReader in_reader = new FileReader(collect_config_file);
788 BufferedReader in = new BufferedReader(in_reader);
789 String command_str = null;
790 while((command_str = in.readLine()) != null) {
791 Element command_element = null;
792 // A command may be broken over several lines.
793 command_str = command_str.trim();
794 boolean eof = false;
795 while(!eof && command_str.endsWith(NEWLINE_CHARACTER)) {
796 String next_line = in.readLine();
797 if(next_line != null) {
798 next_line = next_line.trim();
799 if(next_line.length() > 0) {
800 // Remove the new line character
801 command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
802 // And append the next line, which due to the test above must be non-zero length
803 command_str = command_str + next_line;
804 }
805 next_line = null;
806 }
807 // If we've reached the end of the file theres nothing more we can do
808 else {
809 eof = true;
810 }
811 }
812 // If there is still a new line character, then we remove it and hope for the best
813 if(command_str.endsWith(NEWLINE_CHARACTER)) {
814 command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
815 }
816 // Now we've either got a command to parse...
817 if(command_str.length() != 0) {
818 // Start trying to figure out what it is
819 StringTokenizer tokenizer = new StringTokenizer(command_str);
820 String command_type = tokenizer.nextToken().toLowerCase();
821 tokenizer = null;
822 // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created
823 if(command_element == null && command_type.equals(CLASSIFY_STR)) {
824 command_element = parseClassify(command_str);
825 }
826 if(command_element == null && command_type.equals(FORMAT_STR)) {
827 command_element = parseFormat(command_str);
828 }
829 if(command_element == null && (command_type.equals(INDEX_STR) || command_type.equals(COMMENTED_INDEXES_STR))) {
830 command_element = parseIndex(command_str);
831 }
832 if(command_element == null && (command_type.equals(INDEX_DEFAULT_STR) || command_type.equals(COMMENTED_INDEX_DEFAULT_STR))) {
833 command_element = parseIndexDefault(command_str);
834 }
835 if(command_element == null && command_type.equals(LANGUAGES_STR)) {
836 command_element = parseLanguage(command_str);
837 }
838 if(command_element == null && command_type.equals(LANGUAGE_DEFAULT_STR)) {
839 command_element = parseLanguageDefault(command_str);
840 }
841 if(command_element == null && (command_type.equals(LEVELS_STR) || command_type.equals(COMMENTED_LEVELS_STR))) {
842 command_element = parseLevels(command_str);
843 }
844 if(command_element == null && command_type.equals(COLLECTIONMETADATA_STR)) {
845 command_element = parseMetadata(command_str);
846 }
847 if(command_element == null && (command_type.equals(COLLECTIONMETADATA_BETA_STR) || command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR))) {
848 command_element = parseMetadataSpecial(command_str);
849 }
850 if(command_element == null && command_type.equals(PLUGIN_STR)) {
851 command_element = parsePlugIn(command_str);
852 }
853 if(command_element == null && command_type.equals(SEARCHTYPE_STR)) {
854 command_element = parseSearchType(command_str);
855 }
856 if(command_element == null && command_type.equals(SUBCOLLECTION_STR)) {
857 command_element = parseSubCollection(command_str);
858 }
859 if(command_element == null && command_type.equals(SUBCOLLECTION_DEFAULT_INDEX_STR)) {
860 command_element = parseSubCollectionDefaultIndex(command_str);
861 }
862 if(command_element == null && command_type.equals(SUBCOLLECTION_INDEX_STR)) {
863 command_element = parseSubCollectionIndex(command_str);
864 }
865 if(command_element == null && (command_type.equals(SUPERCOLLECTION_STR) || command_type.equals(CCS_STR))) {
866 command_element = parseSuperCollection(command_str);
867 }
868 // Doesn't match any known type
869 command_type = null;
870 if(command_element == null) {
871 // No-one knows what to do with this command, so we create an Unknown command element
872 command_element = document.createElement(UNKNOWN_ELEMENT);
873 MSMUtils.setValue(command_element, command_str);
874 }
875 }
876 // Or an empty line to remember for later
877 else {
878 command_element = document.createElement(NEWLINE_ELEMENT);
879 }
880 // Now command element shouldn't be null so we append it to the collection config DOM
881 collect_cfg_element.appendChild(command_element);
882 }
883 }
884 catch(Exception exception) {
885 Gatherer.println("Error in CollectionConfiguration.parse(java.io.File): " + exception);
886 Gatherer.printStackTrace(exception);
887 }
888 }
889
890 private Element parseClassify(String command_str) {
891 Element command_element = null;
892 try {
893 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
894 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
895 if(tokenizer.countTokens() >= 4) {
896 command_element = document.createElement(CLASSIFY_ELEMENT);
897 // First token is classify
898 tokenizer.nextToken();
899 // The next token is the classifier type
900 command_element.setAttribute(TYPE_ATTRIBUTE, tokenizer.nextToken());
901 // Now we parse out the remaining arguments into a hashmapping from name to value
902 HashMap arguments = parseArguments(tokenizer);
903 // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
904 Iterator names = arguments.keySet().iterator();
905 while(names.hasNext()) {
906 String name = (String) names.next();
907 String value = (String) arguments.get(name); // Can be null
908 // The metadata argument gets added as the content attribute
909 if(name.equals(METADATA_ARGUMENT) && value != null) {
910 // The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace.
911 if(value.indexOf(MSMUtils.NS_SEP) == -1) {
912 value = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + value;
913 }
914 //command_element.setAttribute(CONTENT_ATTRIBUTE, value);
915 }
916 // Everything else is an Option Element
917 Element option_element = document.createElement(OPTION_ELEMENT);
918 option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
919 if(value != null) {
920 MSMUtils.setValue(option_element, value);
921 }
922 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
923 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
924 command_element.appendChild(option_element);
925 option_element = null;
926 name = null;
927 value = null;
928 }
929 names = null;
930 arguments = null;
931 }
932 tokenizer = null;
933 }
934 catch(Exception error) {
935 }
936 return command_element;
937 }
938
939 private Element parseFormat(String command_str) {
940 Element command_element = null;
941 try {
942 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
943 if(tokenizer.countTokens() >= 3) {
944 command_element = document.createElement(FORMAT_ELEMENT);
945 // First token is format
946 tokenizer.nextToken();
947 command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
948 String format_value = tokenizer.nextToken();
949 // If the value is true or false we add it as an attribute
950 if(format_value.equalsIgnoreCase(TRUE_STR) || format_value.equalsIgnoreCase(FALSE_STR)) {
951 command_element.setAttribute(VALUE_ATTRIBUTE, format_value.toLowerCase());
952 }
953 // Otherwise it gets added as a text node
954 else {
955 // Strip any speech marks
956 if(format_value.startsWith(QUOTE_CHARACTER) && format_value.endsWith(QUOTE_CHARACTER)) {
957 format_value = format_value.substring(1, format_value.length() - 1);
958 }
959 MSMUtils.setValue(command_element, format_value);
960 }
961 format_value = null;
962 }
963 tokenizer = null;
964 }
965 catch(Exception exception) {
966 }
967 return command_element;
968 }
969
970 private Element parseIndex(String command_str) {
971 Element command_element = null;
972 try {
973 StringTokenizer tokenizer = new StringTokenizer(command_str);
974 String command = tokenizer.nextToken();
975 if(tokenizer.hasMoreTokens()) {
976 command_element = document.createElement(INDEXES_ELEMENT);
977 }
978 command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(INDEX_STR) ? TRUE_STR : FALSE_STR));
979 command = null;
980 while(tokenizer.hasMoreTokens()) {
981 Element index_element = document.createElement(INDEX_ELEMENT);
982 String index_str = tokenizer.nextToken();
983 // There are two types of index we have to consider. Old G2.38 and earlier use level:source tuplets while G2.39+ have just a single, non-comma separated list where order is important.
984 boolean old_index;
985 if(index_str.indexOf(COLON_CHARACTER) != -1) {
986 old_index = true;
987 index_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
988 index_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
989 command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR);
990 }
991 else {
992 command_element.setAttribute(MGPP_ATTRIBUTE, TRUE_STR);
993 old_index = false;
994 }
995 StringTokenizer content_tokenizer = new StringTokenizer(index_str, StaticStrings.COMMA_CHARACTER);
996 while(content_tokenizer.hasMoreTokens()) {
997 Element content_element = document.createElement(CONTENT_ELEMENT);
998 String content_str = content_tokenizer.nextToken();
999 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1000 if(content_str.indexOf(MSMUtils.NS_SEP) == -1) {
1001 if(content_str.equals(StaticStrings.TEXT_STR) || (!old_index && content_str.equals(StaticStrings.ALLFIELDS_STR))) {
1002 // Our special strings are OK.
1003 }
1004 else {
1005 content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str;
1006 }
1007 }
1008 content_element.setAttribute(NAME_ATTRIBUTE, content_str);
1009 index_element.appendChild(content_element);
1010 content_element = null;
1011 }
1012 content_tokenizer = null;
1013 index_str = null;
1014 command_element.appendChild(index_element);
1015 index_element = null;
1016 }
1017 tokenizer = null;
1018 }
1019 catch (Exception exception) {
1020 exception.printStackTrace();
1021 }
1022 return command_element;
1023 }
1024
1025 private Element parseIndexDefault(String command_str) {
1026 Element command_element = null;
1027 try {
1028 StringTokenizer tokenizer = new StringTokenizer(command_str);
1029 if(tokenizer.countTokens() >= 2) {
1030 command_element = document.createElement(INDEX_DEFAULT_ELEMENT);
1031 command_element.setAttribute(ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken()).equals(INDEX_DEFAULT_STR) ? TRUE_STR : FALSE_STR));
1032 String index_str = tokenizer.nextToken();
1033 command_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
1034 String content_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
1035 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1036 while(content_tokenizer.hasMoreTokens()) {
1037 Element content_element = document.createElement(CONTENT_ELEMENT);
1038 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1039 command_element.appendChild(content_element);
1040 content_element = null;
1041 }
1042 content_tokenizer = null;
1043 content_str = null;
1044 content_str = null;
1045 index_str = null;
1046 }
1047 tokenizer = null;
1048 }
1049 catch (Exception exception) {
1050 }
1051 return command_element;
1052 }
1053
1054 private Element parseLanguage(String command_str) {
1055 Element command_element = null;
1056 try {
1057 StringTokenizer tokenizer = new StringTokenizer(command_str);
1058 tokenizer.nextToken();
1059 if(tokenizer.hasMoreTokens()) {
1060 command_element = document.createElement(LANGUAGES_ELEMENT);
1061 while(tokenizer.hasMoreTokens()) {
1062 Element language_element = document.createElement(LANGUAGE_ELEMENT);
1063 language_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1064 command_element.appendChild(language_element);
1065 language_element = null;
1066 }
1067 }
1068 tokenizer = null;
1069 }
1070 catch (Exception exception) {
1071 }
1072 return command_element;
1073 }
1074
1075 private Element parseLanguageDefault(String command_str) {
1076 Element command_element = null;
1077 try {
1078 StringTokenizer tokenizer = new StringTokenizer(command_str);
1079 if(tokenizer.countTokens() >= 2) {
1080 command_element = document.createElement(LANGUAGE_DEFAULT_ELEMENT);
1081 tokenizer.nextToken();
1082 String default_language_str = tokenizer.nextToken();
1083 command_element.setAttribute(NAME_ATTRIBUTE, default_language_str);
1084 default_language_str = null;
1085 }
1086 tokenizer = null;
1087 }
1088 catch (Exception exception) {
1089 }
1090 return command_element;
1091 }
1092
1093 private Element parseLevels(String command_str) {
1094 Element command_element = null;
1095 try {
1096 StringTokenizer tokenizer = new StringTokenizer(command_str);
1097 // First token is command type (levels)
1098 String command = tokenizer.nextToken();
1099 if(tokenizer.hasMoreTokens()) {
1100 command_element = document.createElement(LEVELS_ELEMENT);
1101 command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(LEVELS_STR) ? TRUE_STR : FALSE_STR));
1102 while(tokenizer.hasMoreTokens()) {
1103 Element level_element = document.createElement(CONTENT_ELEMENT);
1104 level_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1105 command_element.appendChild(level_element);
1106 level_element = null;
1107 }
1108 }
1109 command = null;
1110 }
1111 catch(Exception exception) {
1112 }
1113 return command_element;
1114 }
1115
1116 private Element parseMetadata(String command_str) {
1117 Element command_element = null;
1118 try {
1119 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1120 if(tokenizer.countTokens() >= 3) {
1121 command_element = document.createElement(COLLECTIONMETADATA_ELEMENT);
1122 // First token is command type
1123 tokenizer.nextToken();
1124 String name_str = tokenizer.nextToken();
1125 String value_str = tokenizer.nextToken();
1126 String language_str = "en"; // By default
1127 // Check if the value string is actually a language string
1128 if(value_str.startsWith(LBRACKET_CHARACTER) && value_str.endsWith(RBRACKET_CHARACTER)) {
1129 language_str = value_str.substring(value_str.indexOf(LANGUAGE_ARGUMENT) + 2, value_str.length() - 1);
1130 value_str = tokenizer.nextToken();
1131 }
1132 // Remove any speech marks from value
1133 if(value_str.startsWith(QUOTE_CHARACTER) && value_str.endsWith(QUOTE_CHARACTER)) {
1134 value_str = value_str.substring(1, value_str.length() - 1);
1135 }
1136 if(value_str != null) {
1137 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1138 command_element.setAttribute(LANGUAGE_ATTRIBUTE, language_str);
1139 MSMUtils.setValue(command_element, Utility.encodeXML(value_str));
1140 }
1141 else {
1142 command_element = null;
1143 }
1144 language_str = null;
1145 value_str = null;
1146 name_str = null;
1147 }
1148 tokenizer = null;
1149 }
1150 catch (Exception exception) {
1151 }
1152 return command_element;
1153 }
1154
1155 private Element parseMetadataSpecial(String command_str) {
1156 Element command_element = null;
1157 try {
1158 StringTokenizer tokenizer = new StringTokenizer(command_str);
1159 if(tokenizer.countTokens() >= 2) {
1160 String name_str = tokenizer.nextToken();
1161 String value_str = tokenizer.nextToken();
1162 if(name_str.equals(COLLECTIONMETADATA_BETA_STR)) {
1163 command_element = document.createElement(COLLECTIONMETADATA_BETA_ELEMENT);
1164 }
1165 else if(name_str.equals(COLLECTIONMETADATA_CREATOR_STR)) {
1166 command_element = document.createElement(COLLECTIONMETADATA_CREATOR_ELEMENT);
1167 }
1168 else if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
1169 command_element = document.createElement(COLLECTIONMETADATA_MAINTAINER_ELEMENT);
1170 }
1171 else if(name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
1172 command_element = document.createElement(COLLECTIONMETADATA_PUBLIC_ELEMENT);
1173 }
1174 if(command_element != null) {
1175 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1176 command_element.setAttribute(LANGUAGE_ATTRIBUTE, ENGLISH_LANGUAGE_STR);
1177 command_element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
1178 MSMUtils.setValue(command_element, value_str);
1179 }
1180 value_str = null;
1181 name_str = null;
1182 }
1183 tokenizer = null;
1184 }
1185 catch (Exception exception) {
1186 }
1187 return command_element;
1188 }
1189
1190 private Element parsePlugIn(String command_str) {
1191 Element command_element = null;
1192 boolean use_metadata_files = false;
1193 try {
1194 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1195 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1196 if(tokenizer.countTokens() >= 2) {
1197 command_element = document.createElement(PLUGIN_ELEMENT);
1198 // First token is plugin
1199 tokenizer.nextToken();
1200 // The next token is the type
1201 String type = tokenizer.nextToken();
1202 command_element.setAttribute(TYPE_ATTRIBUTE, type);
1203 // Now we parse out the remaining arguments into a hashmapping from name to value
1204 HashMap arguments = parseArguments(tokenizer);
1205 // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
1206 Iterator names = arguments.keySet().iterator();
1207 while(names.hasNext()) {
1208 String name = (String) names.next();
1209 String value = (String) arguments.get(name); // Can be null
1210 Element option_element = document.createElement(OPTION_ELEMENT);
1211 if(name.substring(1).equals(USE_METADATA_FILES_ARGUMENT)) {
1212 use_metadata_files = true;
1213 }
1214 option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
1215 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1216 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); // All arguments are considered to be custom until matched against base plugins arguments
1217 if(value != null) {
1218 if(name.equals(METADATA_ARGUMENT)) {
1219 // The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace.
1220 if(value.indexOf(MSMUtils.NS_SEP) == -1) {
1221 value = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + value;
1222 }
1223 }
1224 MSMUtils.setValue(option_element, value);
1225 }
1226 command_element.appendChild(option_element);
1227 option_element = null;
1228 name = null;
1229 value = null;
1230 }
1231 // Finally if the type happened to be a RecPlug we ensure that the use metadata files argument is set
1232 if(type.equals(RECPLUG_STR) && !use_metadata_files) {
1233 Element option_element = document.createElement(OPTION_ELEMENT);
1234 option_element.setAttribute(NAME_ATTRIBUTE, USE_METADATA_FILES_ARGUMENT);
1235 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1236 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
1237 command_element.appendChild(option_element);
1238 option_element = null;
1239 }
1240 type = null;
1241 names = null;
1242 arguments = null;
1243 }
1244 tokenizer = null;
1245 }
1246 catch(Exception exception) {
1247 }
1248 return command_element;
1249 }
1250
1251 private Element parseSearchType(String command_str) {
1252 Element command_element = null;
1253 try {
1254 StringTokenizer tokenizer = new StringTokenizer(command_str);
1255 // First token is command type (levels)
1256 tokenizer.nextToken();
1257 if(tokenizer.hasMoreTokens()) {
1258 command_element = document.createElement(SEARCHTYPE_ELEMENT);
1259 command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1260 while(tokenizer.hasMoreTokens()) {
1261 Element search_element = document.createElement(CONTENT_ELEMENT);
1262 search_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1263 command_element.appendChild(search_element);
1264 search_element = null;
1265 }
1266 }
1267 }
1268 catch(Exception exception) {
1269 }
1270 return command_element;
1271 }
1272
1273 private Element parseSubCollection(String command_str) {
1274 Element command_element = null;
1275 try {
1276 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1277 if(tokenizer.countTokens() >= 3) {
1278 command_element = document.createElement(SUBCOLLECTION_ELEMENT);
1279 // First token is command type
1280 tokenizer.nextToken();
1281 // Then subcollection identifier
1282 command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1283 // Then finally the pattern used to build the subcollection partition
1284 String full_pattern_str = tokenizer.nextToken();
1285 // To make life easier I'm going to parse this up now.
1286 boolean exclusion = (full_pattern_str.substring(1, 2).equals(EXCLAIMATION_CHARACTER));
1287 // Set inclusion/exclusion flag, remove any exclaimation mark and the speech marks
1288 if(exclusion) {
1289 full_pattern_str = full_pattern_str.substring(2, full_pattern_str.length() - 1);
1290 command_element.setAttribute(TYPE_ATTRIBUTE, EXCLUDE_STR);
1291 }
1292 else {
1293 full_pattern_str = full_pattern_str.substring(1, full_pattern_str.length() - 1);
1294 command_element.setAttribute(TYPE_ATTRIBUTE, INCLUDE_STR);
1295 }
1296 StringTokenizer pattern_tokenizer = new StringTokenizer(full_pattern_str, SEPARATOR_CHARACTER);
1297 if(pattern_tokenizer.countTokens() >= 2) {
1298 String content_str = pattern_tokenizer.nextToken();
1299 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1300 if(!content_str.equals(StaticStrings.FILENAME_STR) && content_str.indexOf(MSMUtils.NS_SEP) == -1) {
1301 content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str;
1302 }
1303 command_element.setAttribute(CONTENT_ATTRIBUTE, content_str);
1304 MSMUtils.setValue(command_element, pattern_tokenizer.nextToken());
1305 if(pattern_tokenizer.hasMoreTokens()) {
1306 command_element.setAttribute(OPTIONS_ATTRIBUTE, pattern_tokenizer.nextToken());
1307 }
1308 }
1309 pattern_tokenizer = null;
1310 }
1311 }
1312 catch(Exception exception) {
1313 exception.printStackTrace();
1314 }
1315 return command_element;
1316 }
1317
1318 private Element parseSubCollectionDefaultIndex(String command_str) {
1319 Element command_element = null;
1320 try {
1321 StringTokenizer tokenizer = new StringTokenizer(command_str);
1322 if(tokenizer.countTokens() == 2) {
1323 command_element = document.createElement(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT);
1324 tokenizer.nextToken();
1325 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1326 String content_str = tokenizer.nextToken();
1327 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1328 while(content_tokenizer.hasMoreTokens()) {
1329 Element content_element = document.createElement(CONTENT_ELEMENT);
1330 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1331 command_element.appendChild(content_element);
1332 content_element = null;
1333 }
1334 content_tokenizer = null;
1335 content_str = null;
1336 }
1337 tokenizer = null;
1338 }
1339 catch(Exception exception) {
1340 }
1341 return command_element;
1342 }
1343
1344 private Element parseSubCollectionIndex(String command_str) {
1345 Element command_element = null;
1346 try {
1347 StringTokenizer tokenizer = new StringTokenizer(command_str);
1348 tokenizer.nextToken();
1349 if(tokenizer.hasMoreTokens()) {
1350 command_element = document.createElement(SUBCOLLECTION_INDEXES_ELEMENT);
1351 }
1352 while(tokenizer.hasMoreTokens()) {
1353 Element subcollectionindex_element = document.createElement(INDEX_ELEMENT);
1354 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1355 String content_str = tokenizer.nextToken();
1356 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1357 while(content_tokenizer.hasMoreTokens()) {
1358 Element content_element = document.createElement(CONTENT_ELEMENT);
1359 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1360 subcollectionindex_element.appendChild(content_element);
1361 content_element = null;
1362 }
1363 content_tokenizer = null;
1364 content_str = null;
1365 command_element.appendChild(subcollectionindex_element);
1366 subcollectionindex_element = null;
1367 }
1368 tokenizer = null;
1369 }
1370 catch (Exception exception) {
1371 }
1372 return command_element;
1373 }
1374
1375 private Element parseSuperCollection(String command_str) {
1376 Element command_element = null;
1377 try {
1378 StringTokenizer tokenizer = new StringTokenizer(command_str);
1379 if(tokenizer.countTokens() >= 3) {
1380 command_element = document.createElement(SUPERCOLLECTION_ELEMENT);
1381 tokenizer.nextToken();
1382 while(tokenizer.hasMoreTokens()) {
1383 Element collection_element = document.createElement(COLLECTION_ELEMENT);
1384 collection_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1385 command_element.appendChild(collection_element);
1386 collection_element = null;
1387 }
1388 }
1389 tokenizer = null;
1390 }
1391 catch(Exception exception) {
1392 }
1393 return command_element;
1394 }
1395
1396 private String pluginToString(Element command_element, boolean show_extracted_namespace) {
1397 StringBuffer text = new StringBuffer();
1398 if(!command_element.getAttribute(SEPARATOR_ATTRIBUTE).equals(TRUE_STR)) {
1399 text.append(PLUGIN_STR);
1400 text.append(TAB_CHARACTER);
1401 text.append(TAB_CHARACTER);
1402 text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
1403 // Retrieve, and output, the arguments
1404 NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
1405 int option_elements_length = option_elements.getLength();
1406 if(option_elements_length > 0) {
1407 text.append(SPACE_CHARACTER);
1408 for(int j = 0; j < option_elements_length; j++) {
1409 Element option_element = (Element) option_elements.item(j);
1410 if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1411 text.append(StaticStrings.MINUS_CHARACTER);
1412 text.append(option_element.getAttribute(NAME_ATTRIBUTE));
1413 String value_str = MSMUtils.getValue(option_element);
1414 if(!show_extracted_namespace && value_str.startsWith(EXTRACTED_PREFIX)) {
1415 value_str = value_str.substring(EXTRACTED_PREFIX.length());
1416 }
1417 if(value_str.length() > 0) {
1418 text.append(SPACE_CHARACTER);
1419 if(value_str.indexOf(SPACE_CHARACTER) == -1) {
1420 text.append(value_str);
1421 }
1422 else {
1423 text.append(QUOTE_CHARACTER);
1424 text.append(value_str);
1425 text.append(QUOTE_CHARACTER);
1426 }
1427 }
1428 value_str = null;
1429 if(j < option_elements_length - 1) {
1430 text.append(SPACE_CHARACTER);
1431 }
1432 }
1433 option_element = null;
1434 }
1435 }
1436 option_elements = null;
1437 }
1438 return text.toString();
1439 }
1440
1441 private String searchtypeToString(Element command_element) {
1442 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1443 StringBuffer text = new StringBuffer(SEARCHTYPE_STR);
1444 text.append(TAB_CHARACTER);
1445 NodeList search_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1446 int search_elements_length = search_elements.getLength();
1447 for(int i = 0; i < search_elements_length; i++) {
1448 Element search_element = (Element) search_elements.item(i);
1449 text.append(search_element.getAttribute(NAME_ATTRIBUTE));
1450 text.append(SPACE_CHARACTER);
1451 }
1452 return text.substring(0, text.length() - 1);
1453 }
1454 else {
1455 return null;
1456 }
1457 }
1458
1459 private String subcollectionToString(Element command_element, boolean show_extracted_namespace) {
1460 StringBuffer text = new StringBuffer(SUBCOLLECTION_STR);
1461 text.append(SPACE_CHARACTER);
1462 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
1463 text.append(SPACE_CHARACTER);
1464 text.append(TAB_CHARACTER);
1465 text.append(QUOTE_CHARACTER);
1466 if(command_element.getAttribute(TYPE_ATTRIBUTE).equals(EXCLUDE_STR)) {
1467 text.append(EXCLAIMATION_CHARACTER);
1468 }
1469 String content_str = command_element.getAttribute(CONTENT_ATTRIBUTE);
1470 if(!show_extracted_namespace && content_str.startsWith(EXTRACTED_PREFIX)) {
1471 content_str = content_str.substring(EXTRACTED_PREFIX.length());
1472 }
1473 text.append(content_str);
1474 content_str = null;
1475 text.append(SEPARATOR_CHARACTER);
1476 text.append(MSMUtils.getValue(command_element));
1477 String options_str = command_element.getAttribute(OPTIONS_ATTRIBUTE);
1478 if(options_str.length() > 0) {
1479 text.append(SEPARATOR_CHARACTER);
1480 text.append(options_str);
1481 }
1482 options_str = null;
1483 text.append(QUOTE_CHARACTER);
1484 return text.toString();
1485 }
1486
1487 private String subcollectionDefaultIndexToString(Element command_element) {
1488 StringBuffer text = new StringBuffer(SUBCOLLECTION_DEFAULT_INDEX_STR);
1489 text.append(TAB_CHARACTER);
1490 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1491 int content_elements_length = content_elements.getLength();
1492 for(int j = 0; j < content_elements_length; j++) {
1493 Element content_element = (Element) content_elements.item(j);
1494 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1495 if(j < content_elements_length - 1) {
1496 text.append(StaticStrings.COMMA_CHARACTER);
1497 }
1498 }
1499 return text.toString();
1500 }
1501
1502 private String subcollectionIndexesToString(Element command_element) {
1503 StringBuffer text = new StringBuffer(SUBCOLLECTION_INDEX_STR);
1504 text.append(TAB_CHARACTER);
1505 // Retrieve all of the subcollection index partitions
1506 NodeList subcollectionindex_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
1507 int subcollectionindex_elements_length = subcollectionindex_elements.getLength();
1508 if(subcollectionindex_elements_length == 0) {
1509 return null;
1510 }
1511 for(int j = 0; j < subcollectionindex_elements_length; j++) {
1512 Element subcollectionindex_element = (Element) subcollectionindex_elements.item(j);
1513 NodeList content_elements = subcollectionindex_element.getElementsByTagName(CONTENT_ELEMENT);
1514 int content_elements_length = content_elements.getLength();
1515 for(int k = 0; k < content_elements_length; k++) {
1516 Element content_element = (Element) content_elements.item(k);
1517 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1518 if(k < content_elements_length - 1) {
1519 text.append(StaticStrings.COMMA_CHARACTER);
1520 }
1521 }
1522 if(j < subcollectionindex_elements_length - 1) {
1523 text.append(SPACE_CHARACTER);
1524 }
1525 }
1526 return text.toString();
1527 }
1528
1529 private String supercollectionToString(Element command_element) {
1530 NodeList content_elements = command_element.getElementsByTagName(COLLECTION_ELEMENT);
1531 int content_elements_length = content_elements.getLength();
1532 if(content_elements_length > 1) {
1533 StringBuffer text = new StringBuffer(SUPERCOLLECTION_STR);
1534 text.append(TAB_CHARACTER);
1535 for(int j = 0; j < content_elements_length; j++) {
1536 Element content_element = (Element) content_elements.item(j);
1537 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1538 if(j < content_elements_length - 1) {
1539 text.append(SPACE_CHARACTER);
1540 }
1541 }
1542 return text.toString();
1543 }
1544 return null;
1545 }
1546
1547 private String unknownToString(Element command_element) {
1548 return MSMUtils.getValue(command_element);
1549 }
1550
1551 /** Write the text to the buffer. This is used so we don't have to worry about storing intermediate String values just so we can calaulate length and offset.
1552 * @param writer the BufferedWriter to which the str will be written
1553 * @param str the String to be written
1554 */
1555 private void write(BufferedWriter writer, String str)
1556 throws IOException {
1557 writer.write(str, 0, str.length());
1558 }
1559}
Note: See TracBrowser for help on using the repository browser.