source: trunk/gli/src/org/greenstone/gatherer/cdm/CollectionConfiguration.java@ 8853

Last change on this file since 8853 was 8652, checked in by mdewsnip, 20 years ago

Removed more dead code, and moved most of the XML stuff out of Utility and into XMLTools.

  • Property svn:keywords set to Author Date Id Revision
File size: 78.2 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.cdm;
28
29import java.awt.*;
30import java.awt.event.*;
31import java.io.*;
32import java.util.*;
33import javax.swing.*;
34import org.greenstone.gatherer.Configuration;
35import org.greenstone.gatherer.DebugStream;
36import org.greenstone.gatherer.Gatherer;
37import org.greenstone.gatherer.gui.GLIButton;
38import org.greenstone.gatherer.metadata.MetadataElement;
39import org.greenstone.gatherer.metadata.MetadataTools;
40import org.greenstone.gatherer.util.DOMTree;
41import org.greenstone.gatherer.util.Codec;
42import org.greenstone.gatherer.util.StaticStrings;
43import org.greenstone.gatherer.util.Utility;
44import org.greenstone.gatherer.util.XMLTools;
45import org.w3c.dom.*;
46
47/** This class provides either access to a pseudo-G3 document, or parses a collect.cfg file in such a way as to provide an xml-type view of its content. This later version is useful as it allows the manipulation and free form editing of a legacy collect.cfg file while still allowing the various CDM data managers to base themselves directly on this model (whereas they used to be independant ListModels which clobbered the ordering of unparsed commands).
48 * @author John Thompson, Greenstone Digital Library, University of Waikato
49 * @version 2.3d
50 */
51public class CollectionConfiguration
52 extends StaticStrings {
53
54 static final public String ENCODING = "UTF-8";
55
56 static public Document document;
57
58 static public void main(String[] args) {
59 if(args.length >= 1) {
60 File file = new File(args[0]);
61 CollectionConfiguration collect_cfg = new CollectionConfiguration(file);
62 collect_cfg.save(true);
63 collect_cfg.save(false);
64 collect_cfg = null;
65 }
66 else {
67 System.out.println("Usage: CollectionConfiguration <filename>");
68 }
69 }
70
71 /** Find the best insertion position for the given DOM Element. This should try to match command tag, and if found should then try to group by name or type (eg CollectionMeta), or append to end is no such grouping exists (eg PlugIns). Failing a command match it will check against the command order for the best insertion location.
72 * @param target_element the command Element to be inserted
73 * @return the Element which the given command should be inserted before, or null to append to end of list
74 */
75 static public Node findInsertionPoint(Element target_element) {
76 ///ystem.err.println("Find insertion point: " + target_element.getNodeName());
77 String target_element_name = target_element.getNodeName();
78 Element document_element = document.getDocumentElement();
79 // Try to find commands with the same tag.
80 NodeList matching_elements = document_element.getElementsByTagName(target_element_name);
81 // If we found matching elements, then we have our most likely insertion location, so check within for groupings
82 if(matching_elements.getLength() != 0) {
83 ///ystem.err.println("Found matching elements.");
84 // Only CollectionMeta are grouped.
85 if(target_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
86 ///ystem.err.println("Dealing with collection metadata");
87 // Special case: CollectionMeta can be added at either the start or end of a collection configuration file. However the start position is reserved for special metadata, so if no non-special metadata can be found we must append to the end.
88 // So if the command to be added is special add it immediately after any other special command
89 if(target_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
90 int index = 0;
91 Element matched_element = (Element) matching_elements.item(index);
92 Element sibling_element = (Element) matched_element.getNextSibling();
93 while(sibling_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
94 index++;
95 matched_element = (Element) matching_elements.item(index);
96 sibling_element = (Element) matched_element.getNextSibling();
97 }
98 if(sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
99 Element newline_element = document.createElement(NEWLINE_ELEMENT);
100 document_element.insertBefore(newline_element, sibling_element);
101 }
102 return sibling_element;
103 }
104 // Otherwise try to find a matching 'name' and add after the last one in that group.
105 else {
106 int index = 0;
107 target_element_name = target_element.getAttribute(NAME_ATTRIBUTE);
108 boolean found = false;
109 // Skip all of the special metadata
110 Element matched_element = (Element) matching_elements.item(index);
111 while(matched_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
112 index++;
113 matched_element = (Element) matching_elements.item(index);
114 }
115 // Begin search
116 while(!found && matched_element != null) {
117 if(matched_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
118 found = true;
119 }
120 else {
121 index++;
122 matched_element = (Element) matching_elements.item(index);
123 }
124 }
125 // If we found a match, we need to continue checking until we find the last name match.
126 if(found) {
127 index++;
128 Element previous_sibling = matched_element;
129 Element sibling_element = (Element) matching_elements.item(index);
130 while(sibling_element != null && sibling_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
131 previous_sibling = sibling_element;
132 index++;
133 sibling_element = (Element) matching_elements.item(index);
134 }
135 // Previous sibling now holds the command immediately before where we want to add, so find its next sibling and add to that. In this one case we can ignore new lines!
136 return previous_sibling.getNextSibling();
137 }
138 // If not found we just add after last metadata element
139 else {
140 Element last_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
141 return last_element.getNextSibling();
142 }
143 }
144
145 }
146 else {
147 ///ystem.err.println("Not dealing with collection meta.");
148 Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
149 // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
150 Node sibling_element = matched_element.getNextSibling();
151 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
152 Element newline_element = document.createElement(NEWLINE_ELEMENT);
153 document_element.insertBefore(newline_element, sibling_element);
154 }
155 return sibling_element; // Note that this may be null
156 }
157 }
158 ///ystem.err.println("No matching elements found.");
159 // Locate where this command is in the ordering
160 int command_index = -1;
161 for(int i = 0; command_index == -1 && i < COMMAND_ORDER.length; i++) {
162 if(COMMAND_ORDER[i].equals(target_element_name)) {
163 command_index = i;
164 }
165 }
166 ///ystem.err.println("Command index is: " + command_index);
167 // Now move forward, checking for existing elements in each of the preceeding command orders.
168 int preceeding_index = command_index - 1;
169 ///ystem.err.println("Searching before the target command.");
170 while(preceeding_index >= 0) {
171 matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[preceeding_index]);
172 // If we've found a match
173 if(matching_elements.getLength() > 0) {
174 // We add after the last element
175 Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
176 // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
177 Node sibling_element = matched_element.getNextSibling();
178 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
179 Element newline_element = document.createElement(NEWLINE_ELEMENT);
180 document_element.insertBefore(newline_element, sibling_element);
181 }
182 return sibling_element; // Note that this may be null
183 }
184 preceeding_index--;
185 }
186 // If all that fails, we now move backwards through the commands
187 int susceeding_index = command_index + 1;
188 ///ystem.err.println("Searching after the target command.");
189 while(susceeding_index < COMMAND_ORDER.length) {
190 matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[susceeding_index]);
191 // If we've found a match
192 if(matching_elements.getLength() > 0) {
193 // We add before the first element
194 Element matched_element = (Element) matching_elements.item(0);
195 // One final quick test. If the matched element is immediately preceeded by a NewLine command, then we insert another NewLine before the matched command, then return this new NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
196 Node sibling_element = matched_element.getPreviousSibling();
197 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
198 Element newline_element = document.createElement(NEWLINE_ELEMENT);
199 document_element.insertBefore(newline_element, sibling_element);
200 }
201 return sibling_element; // Note that this may be null
202 }
203 susceeding_index++;
204 }
205 // Well. Apparently there are no other commands in this collection configuration. So append away...
206 return null;
207 }
208
209 static public String toString(Element command_element, boolean show_extracted_namespace) {
210 String command_element_name = command_element.getNodeName();
211 if(command_element_name.equals(CLASSIFY_ELEMENT)) {
212 return self.classifyToString(command_element, show_extracted_namespace);
213 }
214 else if(command_element_name.equals(FORMAT_ELEMENT)) {
215 return self.formatToString(command_element, show_extracted_namespace);
216 }
217 else if(command_element_name.equals(INDEXES_ELEMENT)) {
218 return self.indexesToString(command_element, show_extracted_namespace);
219 }
220 else if(command_element_name.equals(INDEX_DEFAULT_ELEMENT)) {
221 return self.indexDefaultToString(command_element, show_extracted_namespace);
222 }
223 else if(command_element_name.equals(LANGUAGES_ELEMENT)) {
224 return self.languagesToString(command_element);
225 }
226 else if(command_element_name.equals(LANGUAGE_DEFAULT_ELEMENT)) {
227 return self.languageDefaultToString(command_element);
228 }
229 else if(command_element_name.equals(LEVELS_ELEMENT)) {
230 return self.levelsToString(command_element);
231 }
232 else if(command_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
233 return self.metadataToString(command_element, show_extracted_namespace);
234 }
235 else if(command_element_name.equals(COLLECTIONMETADATA_CREATOR_ELEMENT)) {
236 return self.metadataToString(command_element, show_extracted_namespace);
237 }
238 else if(command_element_name.equals(COLLECTIONMETADATA_MAINTAINER_ELEMENT)) {
239 return self.metadataToString(command_element, show_extracted_namespace);
240 }
241 else if(command_element_name.equals(COLLECTIONMETADATA_PUBLIC_ELEMENT)) {
242 return self.metadataToString(command_element, show_extracted_namespace);
243 }
244 // else if(command_element_name.equals(COLLECTIONMETADATA_BETA_ELEMENT)) {
245 // return self.metadataToString(command_element, show_extracted_namespace);
246 // }
247 else if(command_element_name.equals(PLUGIN_ELEMENT)) {
248 return self.pluginToString(command_element, show_extracted_namespace);
249 }
250 else if(command_element_name.equals(SEARCHTYPE_ELEMENT)) {
251 return self.searchtypeToString(command_element);
252 }
253 else if(command_element_name.equals(SUBCOLLECTION_ELEMENT)) {
254 return self.subcollectionToString(command_element, show_extracted_namespace);
255 }
256 else if(command_element_name.equals(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) {
257 return self.subcollectionDefaultIndexToString(command_element);
258 }
259 else if(command_element_name.equals(SUBCOLLECTION_INDEXES_ELEMENT)) {
260 return self.subcollectionIndexesToString(command_element);
261 }
262 else if(command_element_name.equals(SUPERCOLLECTION_ELEMENT)) {
263 return self.supercollectionToString(command_element);
264 }
265 else if(command_element_name.equals(UNKNOWN_ELEMENT)) {
266 return self.unknownToString(command_element);
267 }
268 return "";
269 }
270
271 /** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix.
272 * @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string
273 * @return a HashMap containing the arguments parsed
274 */
275 static public HashMap parseArguments(CommandTokenizer tokenizer) {
276 HashMap arguments = new HashMap();
277 String name = null;
278 String value = null;
279 while(tokenizer.hasMoreTokens() || name != null) {
280 // First we retrieve a name if we need one.
281 if(name == null) {
282 name = tokenizer.nextToken();
283 }
284 // Now we attempt to retrieve a value
285 if(tokenizer.hasMoreTokens()) {
286 value = tokenizer.nextToken();
287 // Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop.
288 if(value.startsWith(StaticStrings.MINUS_CHARACTER)) {
289 arguments.put(name, null);
290 name = value;
291 }
292 // Otherwise we have a typical name->value pair ready to go
293 else {
294 arguments.put(name, value);
295 name = null;
296 }
297 }
298 // Otherwise its a binary flag
299 else {
300 arguments.put(name, null);
301 name = null;
302 }
303 }
304 return arguments;
305 }
306
307 static private ArrayList known_metadata;
308
309 static private CollectionConfiguration self;
310
311 /** Gives the preferred ordering of commands */
312 static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, /* StaticStrings.COLLECTIONMETADATA_BETA_ELEMENT, */ StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT};
313
314 /** ************************** Public Data Members ***************************/
315
316 /** ************************** Private Data Members ***************************/
317
318 /** Is the configuration file currently being read in one of the infamous G2.39 ones. */
319 private boolean is_twopointthreenine = false;
320 private File collect_config_file;
321
322 /** ************************** Public Methods ***************************/
323
324 public CollectionConfiguration(File collect_config_file) {
325 this.self = this;
326 this.collect_config_file = collect_config_file;
327 // If collect_cfg is xml we can load it straight away
328 String collect_config_name = collect_config_file.getName();
329 if(collect_config_name.equals(COLLECTCONFIGURATION_XML)) {
330 // Parse with Utility but don't use class loader
331 document = XMLTools.parse(collect_config_file, false);
332 }
333 // Otherwise if this is a legacy collect.cfg file then read in the template and send to magic parser
334 else if(collect_config_name.equals(COLLECT_CFG)) {
335 document = Utility.parse(PSEUDO_COLLECTCONFIGURATION_XML, true);
336 parse(collect_config_file);
337 }
338 }
339
340 /** This debug facility shows the currently loaded collect.cfg or CollectConfig.xml file as a DOM tree. */
341 public void display() {
342 JDialog dialog = new JDialog(Gatherer.g_man, "Collection Configuration", false);
343 dialog.setSize(400,400);
344 JPanel content_pane = (JPanel) dialog.getContentPane();
345 final DOMTree tree = new DOMTree(document);
346 JButton refresh_button = new GLIButton("Refresh Tree");
347 refresh_button.setMnemonic(KeyEvent.VK_R);
348 refresh_button.addActionListener(new ActionListener() {
349 public void actionPerformed(ActionEvent event) {
350 tree.setDocument(document);
351 }
352 });
353 content_pane.setBorder(BorderFactory.createEmptyBorder(5,5,5,5));
354 content_pane.setLayout(new BorderLayout());
355 content_pane.add(new JScrollPane(tree), BorderLayout.CENTER);
356 content_pane.add(refresh_button, BorderLayout.SOUTH);
357 dialog.setVisible(true);
358 }
359
360// public Element getBeta() {
361// Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_BETA_ELEMENT, null, null);
362// element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_BETA_STR);
363// element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
364// return element;
365// }
366
367 public Element getCreator() {
368 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_CREATOR_ELEMENT, null, null);
369 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_CREATOR_STR);
370 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
371 return element;
372 }
373
374 public Element getDocumentElement() {
375 return document.getDocumentElement();
376 }
377
378 public File getFile() {
379 return collect_config_file;
380 }
381
382 /** Retrieve or create the languages Element. */
383 public Element getLanguages() {
384 return getOrCreateElementByTagName(LANGUAGES_ELEMENT, null, null);
385 }
386
387 public Element getLevels() {
388 return getOrCreateElementByTagName(LEVELS_ELEMENT, null, null);
389 }
390
391 public Element getMaintainer() {
392 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_MAINTAINER_ELEMENT, null, null);
393 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_MAINTAINER_STR);
394 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
395 return element;
396 }
397
398 /** Retrieve or create the indexes Element. Note that this method behaves differently from the other getBlah methods, in that it also has to keep in mind that indexes come in two flavours, MG and MGPP. */
399 public Element getMGIndexes() {
400 return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, FALSE_STR);
401 }
402
403 public Element getMGPPIndexes() {
404 return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, TRUE_STR);
405 }
406
407 public Element getPublic() {
408 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_PUBLIC_ELEMENT, null, null);
409 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_PUBLIC_STR);
410 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
411 return element;
412 }
413
414 /** Retrieve or create the searchtype element. */
415 public Element getSearchType() {
416 ///ystem.err.println("Get or create element by tag name: " + name);
417 Element document_element = document.getDocumentElement();
418 NodeList elements = document_element.getElementsByTagName(SEARCHTYPE_ELEMENT);
419 int elements_length = elements.getLength();
420 if(elements_length > 0) {
421 document_element = null;
422 return (Element) elements.item(0);
423 }
424 // Create the element
425 Element element = document.createElement(SEARCHTYPE_ELEMENT);
426 Node target_node = findInsertionPoint(element);
427 if(target_node != null) {
428 document_element.insertBefore(element, target_node);
429 }
430 else {
431 document_element.appendChild(element);
432 }
433 document_element = null;
434 // Append a default search type node - form
435 Element a_searchtype_element = CollectionDesignManager.collect_config.document.createElement(CollectionConfiguration.CONTENT_ELEMENT);
436 a_searchtype_element.setAttribute(CollectionConfiguration.NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]);
437 element.appendChild(a_searchtype_element);
438 return element;
439 }
440
441 /** Retrieve or create the subindexes Element. */
442 public Element getSubIndexes() {
443 return getOrCreateElementByTagName(SUBCOLLECTION_INDEXES_ELEMENT, null, null);
444 }
445
446 /** Retrieve or create the supercollections Element. */
447 public Element getSuperCollection() {
448 return getOrCreateElementByTagName(SUPERCOLLECTION_ELEMENT, null, null);
449 }
450
451 public boolean ready() {
452 return document != null;
453 }
454
455 public void save() {
456 save(false);
457 }
458
459 public void save(boolean force_xml) {
460 if(collect_config_file.exists()) {
461 File original_file = new File(collect_config_file.getParentFile(), COLLECT_CFG);
462 File backup_file = new File(collect_config_file.getParentFile(), "collect.bak");
463 if(backup_file.exists()) {
464 backup_file.delete();
465 }
466 if(!original_file.renameTo(backup_file)) {
467 DebugStream.println("Can't rename collect.cfg");
468 }
469 }
470 if(force_xml || collect_config_file.getName().equals(COLLECTCONFIGURATION_XML)) {
471 ///ystem.err.println("Writing XML");
472 XMLTools.export(document, new File(collect_config_file.getParentFile(), COLLECTCONFIGURATION_XML));
473 }
474 else {
475 ///ystem.err.println("Writing text");
476 try {
477 OutputStream ostream = new FileOutputStream(collect_config_file);
478 Writer file_writer = new OutputStreamWriter(ostream, ENCODING);
479 //FileWriter file_writer = new FileWriter(collect_config_file, false);
480 BufferedWriter buffered_writer = new BufferedWriter(file_writer);
481 // In order to write out an old style collect.cfg we have to traverse the model and do several 'cute' tricks to ensure the collect.cfg is valid (for instance while every metadata element has a language attribute, only second or subsequent metadata, for a certain name, needs a language argument - hence the known metadata array. Note that within GLI the language will always be shown, but it doesn't crash and burn like G2 does, te-he).
482 // is this still true?? now we are writing all metadata with a lang tag. can we get rid of known_metadata??
483 // Yes we can. Lets see..
484 // known_metadata = new ArrayList(); 'blamo'
485 Element collect_config_element = document.getDocumentElement();
486 NodeList command_elements = collect_config_element.getChildNodes();
487 boolean just_wrote_newline = false; // Prevent two or more newlines in a row
488 for(int i = 0; i < command_elements.getLength(); i++) {
489 Node command_node = command_elements.item(i);
490 if(command_node instanceof Element) {
491 Element command_element = (Element) command_node;
492 // The only thing left are NewLine elements
493 if(command_element.getNodeName().equals(NEWLINE_ELEMENT) && !just_wrote_newline) {
494 buffered_writer.newLine();
495 just_wrote_newline = true;
496 }
497 // Anything else we write to file, but only if it has been assigned, the exception being the Indexes element which just get commented if unassigned (a side effect of MG && MGPP compatibility)
498 else if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR) || command_element.getNodeName().equals(INDEXES_ELEMENT) || command_element.getNodeName().equals(INDEX_DEFAULT_ELEMENT) || command_element.getNodeName().equals(LEVELS_ELEMENT)){
499 String command;
500 // format statements we write out with ex. still present
501 if (command_element.getNodeName().equals(FORMAT_ELEMENT)) {
502 command = toString(command_element, true);
503 } else {
504 command = toString(command_element, false);
505 }
506 if(command != null && command.length() > 0) {
507 write(buffered_writer, command);
508 buffered_writer.newLine();
509 just_wrote_newline = false;
510 }
511 }
512 }
513 }
514 buffered_writer.close();
515 // known_metadata = null; 'poof'
516 }
517 catch (Exception exception) {
518 DebugStream.println("Error in CollectionConfiguration.save(boolean): " + exception);
519 DebugStream.printStackTrace(exception);
520 }
521 }
522 }
523
524 /** ************************** Private Methods ***************************/
525
526 private String classifyToString(Element command_element, boolean show_extracted_namespace)
527 {
528 StringBuffer text = new StringBuffer(CLASSIFY_STR);
529 text.append(TAB_CHARACTER);
530 text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
531 text.append(SPACE_CHARACTER);
532 NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
533 int option_elements_length = option_elements.getLength();
534 for(int j = 0; j < option_elements_length; j++) {
535 Element option_element = (Element) option_elements.item(j);
536 if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
537 text.append(StaticStrings.MINUS_CHARACTER);
538 text.append(option_element.getAttribute(NAME_ATTRIBUTE));
539 String value_str = XMLTools.getValue(option_element);
540
541 // Remove the extracted metadata namespaces if required
542 if (value_str.length() > 0) {
543 StringTokenizer string_tokenizer = new StringTokenizer(value_str, ",");
544 value_str = "";
545 while (string_tokenizer.hasMoreElements()) {
546 String raw_token = (String) string_tokenizer.nextElement();
547 String token = raw_token.trim();
548 MetadataElement metadata_element = MetadataTools.getMetadataElementWithDisplayName(token);
549 if (metadata_element != null) {
550 token = metadata_element.getFullName();
551 }
552
553 if (token.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
554 token = token.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
555 }
556
557 value_str = value_str + token;
558 if (string_tokenizer.hasMoreElements()) {
559 value_str = value_str + ",";
560 }
561 }
562 }
563
564 text.append(SPACE_CHARACTER);
565 if (value_str.indexOf(SPACE_CHARACTER) == -1) {
566 text.append(value_str);
567 }
568 else {
569 text.append(SPEECH_CHARACTER);
570 text.append(value_str);
571 text.append(SPEECH_CHARACTER);
572 }
573 value_str = null;
574 if(j < option_elements_length - 1) {
575 text.append(SPACE_CHARACTER);
576 }
577 }
578 option_element = null;
579 }
580 option_elements = null;
581 return text.toString();
582 }
583
584 private String formatToString(Element command_element, boolean show_extracted_namespace) {
585 StringBuffer text = new StringBuffer(FORMAT_STR);
586 text.append(SPACE_CHARACTER);
587 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
588 text.append(SPACE_CHARACTER);
589 String value_str = command_element.getAttribute(VALUE_ATTRIBUTE);
590 if(value_str.length() != 0) {
591 text.append(value_str);
592 }
593 else {
594 // Remember to encode format string to Greenstone specification
595 value_str = Codec.transform(XMLTools.getValue(command_element), Codec.DOM_TO_GREENSTONE);
596 // Remove any references to a namespace for extracted metadata
597 if (!show_extracted_namespace) {
598 String match_string = "\\[" + Utility.EXTRACTED_METADATA_NAMESPACE + "\\.";
599 value_str = value_str.replaceAll(match_string, "[");
600 }
601
602 text.append(SPEECH_CHARACTER);
603 text.append(value_str);
604 text.append(SPEECH_CHARACTER);
605 }
606 value_str = null;
607 return text.toString();
608 }
609
610 /** Retrieve or create the indexes Element. */
611 private Element getOrCreateElementByTagName(String name, String conditional_attribute, String required_value) {
612 Element document_element = document.getDocumentElement();
613 NodeList elements = document_element.getElementsByTagName(name);
614 int elements_length = elements.getLength();
615 if(elements_length > 0) {
616 if(conditional_attribute == null) {
617 document_element = null;
618 return (Element) elements.item(0);
619 }
620 else {
621 for(int i = 0; i < elements_length; i++) {
622 Element element = (Element) elements.item(i);
623 if(element.getAttribute(conditional_attribute).equals(required_value)) {
624 document_element = null;
625 return element;
626 }
627 element = null;
628 }
629 }
630 }
631 // Create the element
632 Element element = document.createElement(name);
633 // If there was a property set it
634 if(conditional_attribute != null) {
635 element.setAttribute(conditional_attribute, required_value);
636 }
637 Node target_node = findInsertionPoint(element);
638 if(target_node != null) {
639 document_element.insertBefore(element, target_node);
640 }
641 else {
642 document_element.appendChild(element);
643 }
644 document_element = null;
645 return element;
646 }
647
648 private String indexesToString(Element command_element, boolean show_extracted_namespace) {
649 boolean comment_only = false;
650 StringBuffer text = new StringBuffer("");
651 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
652 text.append("#");
653 comment_only = true;
654 }
655 text.append(INDEX_STR);
656 text.append(TAB_CHARACTER);
657 if(!comment_only) {
658 text.append(TAB_CHARACTER);
659 }
660 NodeList index_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
661 if (index_elements.getLength() == 0) { // no indexes
662 return "";
663 }
664 // For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list
665 int index_elements_length = index_elements.getLength();
666 for(int j = 0; j < index_elements_length; j++) {
667 Element index_element = (Element) index_elements.item(j);
668 String level_str = index_element.getAttribute(LEVEL_ATTRIBUTE);
669 if(level_str.length() > 0) {
670 text.append(level_str);
671 text.append(StaticStrings.COLON_CHARACTER);
672 }
673 NodeList content_elements = index_element.getElementsByTagName(CONTENT_ELEMENT);
674 int content_elements_length = content_elements.getLength();
675 // Don't output anything if no indexes are set
676 if(content_elements_length == 0) {
677 return null;
678 }
679 for(int k = 0; k < content_elements_length; k++) {
680 Element content_element = (Element) content_elements.item(k);
681 String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
682 if(!show_extracted_namespace && name_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
683 name_str = name_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
684 }
685 text.append(name_str);
686 name_str = null;
687 if(k < content_elements_length - 1) {
688 text.append(StaticStrings.COMMA_CHARACTER);
689 }
690 content_element = null;
691 }
692 if(j < index_elements_length - 1) {
693 text.append(SPACE_CHARACTER);
694 }
695 content_elements = null;
696 index_element = null;
697 }
698 index_elements = null;
699 return text.toString();
700 }
701
702 private String indexDefaultToString(Element command_element, boolean show_extracted_namespace) {
703 StringBuffer text = new StringBuffer("");
704 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
705 text.append("#");
706 }
707 text.append(INDEX_DEFAULT_STR);
708 text.append(TAB_CHARACTER);
709 text.append(command_element.getAttribute(LEVEL_ATTRIBUTE));
710 text.append(StaticStrings.COLON_CHARACTER);
711 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
712 int content_elements_length = content_elements.getLength();
713 for(int j = 0; j < content_elements_length; j++) {
714 Element content_element = (Element) content_elements.item(j);
715 String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
716 if(!show_extracted_namespace && name_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
717 name_str = name_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
718 }
719 text.append(name_str);
720 name_str = null;
721 if(j < content_elements_length - 1) {
722 text.append(StaticStrings.COMMA_CHARACTER);
723 }
724 content_element = null;
725 }
726 content_elements = null;
727 return text.toString();
728 }
729
730 private String languagesToString(Element command_element) {
731 StringBuffer text = new StringBuffer(LANGUAGES_STR);
732 text.append(TAB_CHARACTER);
733 // Retrieve all the languages and write them out in a space separated list
734 NodeList language_elements = command_element.getElementsByTagName(LANGUAGE_ELEMENT);
735 int language_elements_length = language_elements.getLength();
736 if(language_elements_length == 0) {
737 return null;
738 }
739 for(int j = 0; j < language_elements_length; j++) {
740 Element language_element = (Element) language_elements.item(j);
741 text.append(language_element.getAttribute(NAME_ATTRIBUTE));
742 if(j < language_elements_length - 1) {
743 text.append(SPACE_CHARACTER);
744 }
745 }
746 return text.toString();
747 }
748
749 private String languageDefaultToString(Element command_element) {
750 StringBuffer text = new StringBuffer(LANGUAGE_DEFAULT_STR);
751 text.append(TAB_CHARACTER);
752 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
753 return text.toString();
754 }
755
756 private String levelsToString(Element command_element) {
757 StringBuffer text = new StringBuffer("");
758 if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
759 text.append("#");
760 }
761 text.append(LEVELS_STR);
762 text.append(TAB_CHARACTER);
763 text.append(TAB_CHARACTER);
764 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
765 int content_elements_length = content_elements.getLength();
766 // Don't output anything if no levels are set.
767 if(content_elements_length == 0) {
768 return null;
769 }
770 for(int i = 0; i < content_elements_length; i++) {
771 Element content_element = (Element) content_elements.item(i);
772 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
773 text.append(SPACE_CHARACTER);
774 }
775 return text.substring(0, text.length() - 1);
776 }
777
778 static public String metadataToString(Element command_element, boolean text_value) {
779 boolean special = false;
780 // If there is no value attribute, then we don't write anything
781 StringBuffer text = new StringBuffer("");
782 String name_str = command_element.getAttribute(NAME_ATTRIBUTE);
783 // If the name is one of the special four, we don't write the collectionmeta first. Note the maintainer collectionmeta is singled out for 'prittying' reasons.
784 if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
785 text.append(name_str);
786 text.append(TAB_CHARACTER);
787 special = true;
788 }
789 else if (/* name_str.equals(COLLECTIONMETADATA_BETA_STR) || */ name_str.equals(COLLECTIONMETADATA_CREATOR_STR) || name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
790 text.append(name_str);
791 text.append(TAB_CHARACTER);
792 text.append(TAB_CHARACTER);
793 special = true;
794 }
795 else {
796 text.append(COLLECTIONMETADATA_STR);
797 text.append(TAB_CHARACTER);
798 text.append(name_str);
799 text.append(SPACE_CHARACTER);
800 String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE);
801 // If this is element is in english, and it is the first one found, we don't need to write the language argument.
802 //if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) {
803 // changed so that we always write the language string
804 text.append(LBRACKET_CHARACTER);
805 text.append(LANGUAGE_ARGUMENT);
806 text.append(language_str);
807 text.append(RBRACKET_CHARACTER);
808 text.append(SPACE_CHARACTER);
809 //}
810 if(known_metadata != null) {
811 known_metadata.add(name_str);
812 }
813 language_str = null;
814 }
815 name_str = null;
816
817 String value_str = XMLTools.getValue(command_element);
818 // The value string we retrieved will be encoded for xml, so we now decode it - to text if text_value set. This parameter was originally show_extracted_namespace, but sincethis is only true for 'toString()' commands from within the CDM, its good enough to determine if this toString() will be used to display on screen, or write to collect.cfg
819 if(text_value == CollectionMeta.TEXT) {
820 value_str = Codec.transform(value_str, Codec.DOM_TO_TEXT);
821 }
822 else {
823 value_str = Codec.transform(value_str, Codec.DOM_TO_GREENSTONE);
824 }
825
826 // We don't wrap the email addresses in quotes, nor the other special metadata
827 if(special) {
828 text.append(value_str);
829 }
830 else {
831 text.append(SPEECH_CHARACTER);
832 text.append(value_str);
833 text.append(SPEECH_CHARACTER);
834 }
835 value_str = null;
836 return text.toString();
837 }
838
839 /** Parse a collect.cfg into a DOM model representation. */
840 private void parse(File collect_config_file) {
841 try {
842 ArrayList acquired_collectionmeta_names = null;
843 ArrayList obsolete_collectionmeta_names = null;
844 HashMap changed_collectionmeta_names = null;
845
846 // Life is made oh-so-more tricky by the existance of G2.39 config files. There are two ways to handle them:
847 // 1. Notice that the file is G2.39 from the start, then as I parse it magic it into G2.4 standard
848 // 2. Extend the parsing method to handle reading in G2.39, then afterwards go through the DOM changing it to G2.4 as appropriate.
849 // As far as I can tell the second option is twice as much work, so I'll try option 1. The problem here is that I have to determine if the 'buildtype' command is somewhere in the collect.cfg file, which means I'm going to have to read the file twice - once seaching for 'buildtype' and the second time to parse it.
850
851 // Search for 'buildtype mgpp'
852 InputStream input_stream_one = new FileInputStream(collect_config_file);
853 Reader reader_one = new InputStreamReader(input_stream_one, ENCODING);
854 BufferedReader buffered_reader_one = new BufferedReader(reader_one);
855 String search_line_str = null;
856 while(!is_twopointthreenine && (search_line_str = buffered_reader_one.readLine()) != null) {
857 if(search_line_str.toLowerCase().indexOf(BUILDTYPE_STR) != -1) {
858 is_twopointthreenine = true;
859 acquired_collectionmeta_names = new ArrayList();
860 changed_collectionmeta_names = new HashMap();
861 obsolete_collectionmeta_names = new ArrayList();
862 }
863 }
864 buffered_reader_one.close();
865 reader_one.close();
866 input_stream_one.close();
867 buffered_reader_one = null;
868 reader_one = null;
869 input_stream_one = null;
870
871 Element collect_cfg_element = document.getDocumentElement();
872 // Read in the file one command at a time.
873 InputStream istream = new FileInputStream(collect_config_file);
874 Reader in_reader = new InputStreamReader(istream, ENCODING);
875 BufferedReader in = new BufferedReader(in_reader);
876 String command_str = null;
877 while((command_str = in.readLine()) != null) {
878 boolean append_element = true;
879 Element command_element = null;
880 // A command may be broken over several lines.
881 command_str = command_str.trim();
882 boolean eof = false;
883 while(!eof && command_str.endsWith(NEWLINE_CHARACTER)) {
884 String next_line = in.readLine();
885 if(next_line != null) {
886 next_line = next_line.trim();
887 if(next_line.length() > 0) {
888 // Remove the new line character
889 command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
890 // And append the next line, which due to the test above must be non-zero length
891 command_str = command_str + next_line;
892 }
893 next_line = null;
894 }
895 // If we've reached the end of the file theres nothing more we can do
896 else {
897 eof = true;
898 }
899 }
900 // If there is still a new line character, then we remove it and hope for the best
901 if(command_str.endsWith(NEWLINE_CHARACTER)) {
902 command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
903 }
904 // Now we've either got a command to parse...
905 if(command_str.length() != 0) {
906 // Start trying to figure out what it is
907 //StringTokenizer tokenizer = new StringTokenizer(command_str);
908 // Instead of a standard string tokenizer I'm going to use the new version of CommandTokenizer, which is not only smart enough to correctly notice speech marks and correctly parse them out, but now also takes the input stream so it can rebuild tokens that stretch over several lines.
909 CommandTokenizer tokenizer = new CommandTokenizer(command_str, in);
910 String command_type = tokenizer.nextToken().toLowerCase();
911 // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created
912 if(command_element == null && command_type.equals(BUILDTYPE_STR)) {
913 DebugStream.println("G2.39 Buildtype command detected. Ignoring.");
914 command_element = document.createElement(UNKNOWN_ELEMENT);
915 append_element = false;
916 }
917 if(command_element == null && command_type.equals(CLASSIFY_STR)) {
918 command_element = parseClassify(command_str);
919 }
920 if(command_element == null && command_type.equals(FORMAT_STR)) {
921 command_element = parseFormat(tokenizer); // Revised to handle multiple lines
922 }
923 if(command_element == null && (command_type.equals(INDEX_STR) || command_type.equals(COMMENTED_INDEXES_STR))) {
924 // If this was a G2.39 config file then we manipulate the command string a bit before we submit it to the parser. We start by adding allfields as the first index. We then space separate the remaining indexes, and remove duplicates when encountered. Of course before we do any of that we record the various space separated indexes so that we can remove the collection meta assigned to them.
925 if(is_twopointthreenine) {
926 DebugStream.println("G2.39 Index command detected. Modifying.");
927 DebugStream.println("Before: " + command_str);
928 StringBuffer new_command_str = new StringBuffer(command_type);
929 new_command_str.append(SPACE_CHARACTER);
930 new_command_str.append(ALLFIELDS_STR);
931 new_command_str.append(SPACE_CHARACTER);
932 ArrayList known_indexes = new ArrayList();
933 while(tokenizer.hasMoreTokens()) {
934 String old_index_str = tokenizer.nextToken();
935 // If this index is a combination of sources, then we need to remove the old collectionmeta, split up the compound index, then request new metadata be added for each part
936 if(old_index_str.indexOf(COMMA_CHARACTER) != -1) {
937 obsolete_collectionmeta_names.add(STOP_CHARACTER + old_index_str);
938 StringTokenizer string_tokenizer = new StringTokenizer(old_index_str, COMMA_CHARACTER);
939 while(string_tokenizer.hasMoreTokens()) {
940 String index_fragment_str = string_tokenizer.nextToken();
941 if(!known_indexes.contains(index_fragment_str)) {
942 known_indexes.add(index_fragment_str);
943 new_command_str.append(index_fragment_str);
944 new_command_str.append(SPACE_CHARACTER);
945 acquired_collectionmeta_names.add(STOP_CHARACTER + index_fragment_str);
946 }
947 index_fragment_str = null;
948 }
949 string_tokenizer = null;
950 }
951 // However if this was just a single index then a little choir of angels sing haleluja because we don't have to do -anything-. Nothing at all. Zip. Well no changes anyway. I obviously had to write this comment, and you can probably see, assuming you are not blind, that there are several lines of code below doing something, which is of course not nothing but something. And if we assume you are blind then you probably can't see the code, but then you probably didn't not see it doing the not nothing I said it would above.
952 else {
953 if(!known_indexes.contains(old_index_str)) {
954 known_indexes.add(old_index_str);
955 new_command_str.append(old_index_str);
956 new_command_str.append(SPACE_CHARACTER);
957 }
958 else {
959 // Use the collectionmeta for the single index instead of generating a default one
960 acquired_collectionmeta_names.remove(STOP_CHARACTER + old_index_str);
961 }
962 }
963 old_index_str = null;
964 }
965 known_indexes = null;
966 command_str = new_command_str.toString();
967 new_command_str = null;
968 DebugStream.println("After: " + command_str);
969 }
970 command_element = parseIndex(command_str);
971 }
972 if(command_element == null && (command_type.equals(INDEX_DEFAULT_STR) || command_type.equals(COMMENTED_INDEX_DEFAULT_STR))) {
973
974 command_element = parseIndexDefault(command_str);
975 // If this was a G2.39 config file then we squelch the default index (no such thing in G2.4)
976 if(is_twopointthreenine) {
977 DebugStream.println("G2.39 Default Index command detected. Ignoring.");
978 append_element = false;
979 }
980 }
981 if(command_element == null && command_type.equals(LANGUAGES_STR)) {
982 command_element = parseLanguage(command_str);
983 }
984 if(command_element == null && command_type.equals(LANGUAGE_DEFAULT_STR)) {
985 command_element = parseLanguageDefault(command_str);
986 }
987 if(command_element == null && (command_type.equals(LEVELS_STR) || command_type.equals(COMMENTED_LEVELS_STR))) {
988 // Again if this is G2.39 we have to do a tiny bit of magic to the levels command. We need to add document level, and change the remainder to lower case.
989 if(is_twopointthreenine) {
990 DebugStream.println("G2.39 Levels command detected. Modifying.");
991 DebugStream.println("Before: " + command_str);
992 StringBuffer new_command_str = new StringBuffer(command_type);
993 new_command_str.append(SPACE_CHARACTER);
994 new_command_str.append(DOCUMENT_STR);
995 while(tokenizer.hasMoreTokens()) {
996 String token = tokenizer.nextToken();
997 // Generate a lower case version
998 String token_lc = token.toLowerCase();
999 // If they are still the same then it is all good baby, otherwise we have to remember to transform their collectionmeta as well
1000 if(!token.equals(token_lc)) {
1001 changed_collectionmeta_names.put(STOP_CHARACTER + token, STOP_CHARACTER + token_lc);
1002 }
1003 new_command_str.append(SPACE_CHARACTER);
1004 new_command_str.append(token_lc);
1005 token_lc = null;
1006 token = null;
1007 }
1008 command_str = new_command_str.toString();
1009 new_command_str = null;
1010 DebugStream.println("After: " + command_str);
1011 }
1012 command_element = parseLevels(command_str);
1013 }
1014 if(command_element == null && command_type.equals(COLLECTIONMETADATA_STR)) {
1015 command_element = parseMetadata(tokenizer); // Revised to handle multiple lines
1016 }
1017 if(command_element == null && (/* command_type.equals(COLLECTIONMETADATA_BETA_STR) || */ command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR))) {
1018 command_element = parseMetadataSpecial(command_str);
1019 }
1020 if(command_element == null && command_type.equals(PLUGIN_STR)) {
1021 command_element = parsePlugIn(command_str);
1022 }
1023 if(command_element == null && command_type.equals(SEARCHTYPE_STR)) {
1024 command_element = parseSearchType(command_str);
1025 }
1026 if(command_element == null && command_type.equals(SUBCOLLECTION_STR)) {
1027 command_element = parseSubCollection(command_str);
1028 }
1029 if(command_element == null && command_type.equals(SUBCOLLECTION_DEFAULT_INDEX_STR)) {
1030 command_element = parseSubCollectionDefaultIndex(command_str);
1031 }
1032 if(command_element == null && command_type.equals(SUBCOLLECTION_INDEX_STR)) {
1033 command_element = parseSubCollectionIndex(command_str);
1034 }
1035 if(command_element == null && (command_type.equals(SUPERCOLLECTION_STR) || command_type.equals(CCS_STR))) {
1036 command_element = parseSuperCollection(command_str);
1037 }
1038 // Doesn't match any known type
1039 command_type = null;
1040 if(command_element == null) {
1041 // No-one knows what to do with this command, so we create an Unknown command element
1042 command_element = document.createElement(UNKNOWN_ELEMENT);
1043 XMLTools.setValue(command_element, command_str);
1044 }
1045 }
1046 // Or an empty line to remember for later
1047 else {
1048 command_element = document.createElement(NEWLINE_ELEMENT);
1049 }
1050 // Now command element shouldn't be null so we append it to the collection config DOM, but only if we haven't been told not to add it
1051 if(append_element) {
1052 collect_cfg_element.appendChild(command_element);
1053 }
1054 }
1055
1056 // We have completed parsing the collect configuration file. Now, if we are dealing with the G2.39 nightmare scenario, it's time to add the SearchType command and modify the collectionmeta commands as necessary.
1057 if(is_twopointthreenine) {
1058 Element search_type_element = getSearchType();
1059 search_type_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1060 while(search_type_element.hasChildNodes()) {
1061 search_type_element.removeChild(search_type_element.getFirstChild());
1062 }
1063 Element plain_search_type_element = document.createElement(CONTENT_ELEMENT);
1064 plain_search_type_element.setAttribute(NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[1]);
1065 search_type_element.appendChild(plain_search_type_element);
1066 plain_search_type_element = null;
1067 Element form_search_type_element = document.createElement(CONTENT_ELEMENT);
1068 form_search_type_element.setAttribute(NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]);
1069 search_type_element.appendChild(form_search_type_element);
1070 form_search_type_element = null;
1071 search_type_element = null;
1072
1073 // Search through the existing collectionmeta
1074 Element document_element = document.getDocumentElement();
1075 NodeList collectionmeta_elements = document_element.getElementsByTagName(COLLECTIONMETADATA_ELEMENT);
1076 DebugStream.println("There are " + obsolete_collectionmeta_names.size() + " collectionmeta to remove.");
1077 DebugStream.println("There are " + changed_collectionmeta_names.size() + " collectionmeta to change.");
1078 for(int z = collectionmeta_elements.getLength(); z > 0; z--) {
1079 Element collectionmeta_element = (Element) collectionmeta_elements.item(z - 1);
1080 String name = collectionmeta_element.getAttribute(NAME_ATTRIBUTE);
1081 DebugStream.println("Checking " + name);
1082 // Remove any obsolete metadata
1083 if(obsolete_collectionmeta_names.contains(name)) {
1084 DebugStream.println("G2.39 CollectMeta detected. Removing: " + name);
1085 document_element.removeChild(collectionmeta_element);
1086 }
1087 // We may have been asked to change the index name to lower case
1088 else if(changed_collectionmeta_names.containsKey(name)) {
1089 String new_name = (String) changed_collectionmeta_names.get(name);
1090 DebugStream.println("G2.39 CollectMeta detected. Changing: " + name + " -> " + new_name);
1091 collectionmeta_element.setAttribute(NAME_ATTRIBUTE, new_name);
1092 new_name = null;
1093 }
1094 name = null;
1095 }
1096
1097 // Finally add any newly acquired collectionmeta. This general defaults to the collectionmeta name less the full stop
1098 DebugStream.println("There are " + acquired_collectionmeta_names.size() + " collectionmeta to add.");
1099 for(int y = 0; y < acquired_collectionmeta_names.size(); y++) {
1100 String name = (String) acquired_collectionmeta_names.get(y);
1101 String value = name.substring(1);
1102 DebugStream.println("G2.39 CollectMeta missing. Adding: " + name + " [l=" + Configuration.getLanguage() + "] \"" + value + "\"");
1103 Element element = document.createElement(COLLECTIONMETADATA_ELEMENT);
1104 element.setAttribute(NAME_ATTRIBUTE, name);
1105 element.setAttribute(LANGUAGE_ATTRIBUTE, Configuration.getLanguage());
1106 element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1107 XMLTools.setValue(element, value);
1108 document_element.appendChild(element);
1109 element = null;
1110 value = null;
1111 name = null;
1112 }
1113
1114 document_element = null;
1115 }
1116 }
1117 catch(Exception exception) {
1118 DebugStream.println("Error in CollectionConfiguration.parse(java.io.File): " + exception);
1119 DebugStream.printStackTrace(exception);
1120 }
1121 }
1122
1123 private Element parseClassify(String command_str) {
1124 Element command_element = null;
1125 try {
1126 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1127 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1128 if(tokenizer.countTokens() >= 2) { // Must support "classify Phind" (no args)
1129 command_element = document.createElement(CLASSIFY_ELEMENT);
1130 // First token is classify
1131 tokenizer.nextToken();
1132 // The next token is the classifier type
1133 command_element.setAttribute(TYPE_ATTRIBUTE, tokenizer.nextToken());
1134 // Now we parse out the remaining arguments into a hashmapping from name to value
1135 HashMap arguments = parseArguments(tokenizer);
1136 // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
1137 Iterator names = arguments.keySet().iterator();
1138 while(names.hasNext()) {
1139 String name = (String) names.next();
1140 String value = (String) arguments.get(name); // Can be null
1141 // The metadata argument gets added as the content attribute
1142 if (name.equals(METADATA_ARGUMENT) && value != null) {
1143 // Add the extracted namespace onto un-namespaced metadata names
1144 StringTokenizer string_tokenizer = new StringTokenizer(value, ",");
1145 value = "";
1146 while (string_tokenizer.hasMoreElements()) {
1147 String token = (String) string_tokenizer.nextElement();
1148
1149 if (token.indexOf(StaticStrings.NS_SEP) == -1) {
1150 token = StaticStrings.EXTRACTED_NAMESPACE + token;
1151 }
1152 else {
1153 MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(token);
1154 if (metadata_element != null) {
1155 token = metadata_element.getDisplayName();
1156 }
1157 }
1158
1159 if (!value.equals("")) {
1160 value = value + ",";
1161 }
1162 value = value + token;
1163 }
1164 }
1165 // Everything else is an Option Element
1166 Element option_element = document.createElement(OPTION_ELEMENT);
1167 option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
1168 if(value != null) {
1169 // Remove any speech marks appended in strings containing whitespace
1170 if(value.startsWith(SPEECH_CHARACTER) && value.endsWith(SPEECH_CHARACTER)) {
1171 value = value.substring(1, value.length() - 1);
1172 }
1173 XMLTools.setValue(option_element, value);
1174 }
1175 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1176 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
1177 command_element.appendChild(option_element);
1178 option_element = null;
1179 name = null;
1180 value = null;
1181 }
1182 names = null;
1183 arguments = null;
1184 }
1185 tokenizer = null;
1186 }
1187 catch(Exception error) {
1188 }
1189 return command_element;
1190 }
1191
1192 private Element parseFormat(CommandTokenizer tokenizer) {
1193 Element command_element = null;
1194 try {
1195 command_element = document.createElement(FORMAT_ELEMENT);
1196 String name_str = tokenizer.nextToken();
1197 String value_str = tokenizer.nextToken();
1198 if(name_str != null && value_str != null) {
1199 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1200 // If the value is true or false we add it as an attribute
1201 if(value_str.equalsIgnoreCase(TRUE_STR) || value_str.equalsIgnoreCase(FALSE_STR)) {
1202 command_element.setAttribute(VALUE_ATTRIBUTE, value_str.toLowerCase());
1203 }
1204 // Otherwise it gets added as a text node
1205 else {
1206 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
1207 value_str = Codec.transform(value_str, Codec.GREENSTONE_TO_DOM);
1208 XMLTools.setValue(command_element, value_str);
1209 }
1210 }
1211 else {
1212 command_element = null;
1213 }
1214 name_str = null;
1215 value_str = null;
1216 }
1217 catch (Exception exception) {
1218 DebugStream.printStackTrace(exception);
1219 command_element = null;
1220 }
1221 return command_element;
1222 }
1223
1224 private Element parseIndex(String command_str) {
1225 Element command_element = null;
1226 try {
1227 StringTokenizer tokenizer = new StringTokenizer(command_str);
1228 String command = tokenizer.nextToken();
1229 command_element = document.createElement(INDEXES_ELEMENT);
1230 command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(INDEX_STR) ? TRUE_STR : FALSE_STR));
1231 command = null;
1232 if(!tokenizer.hasMoreTokens()) {
1233
1234 // there are no indexes
1235 command_element.setAttribute(ASSIGNED_ATTRIBUTE, FALSE_STR);
1236 command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR); // for now
1237 tokenizer = null;
1238 return command_element;
1239 }
1240
1241 while(tokenizer.hasMoreTokens()) {
1242 Element index_element = document.createElement(INDEX_ELEMENT);
1243 String index_str = tokenizer.nextToken();
1244 // There are two types of index we have to consider. Old G2.38 and earlier use level:source tuplets while G2.39+ have just a single, non-comma separated list where order is important.
1245 boolean old_index;
1246 if(index_str.indexOf(COLON_CHARACTER) != -1) {
1247 old_index = true;
1248 index_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
1249 index_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
1250 command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR);
1251 }
1252 else {
1253 command_element.setAttribute(MGPP_ATTRIBUTE, TRUE_STR);
1254 old_index = false;
1255 }
1256 StringTokenizer content_tokenizer = new StringTokenizer(index_str, StaticStrings.COMMA_CHARACTER);
1257 while(content_tokenizer.hasMoreTokens()) {
1258 Element content_element = document.createElement(CONTENT_ELEMENT);
1259 String content_str = content_tokenizer.nextToken();
1260 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1261 if(content_str.indexOf(StaticStrings.NS_SEP) == -1) {
1262 if(content_str.equals(StaticStrings.TEXT_STR) || (!old_index && content_str.equals(StaticStrings.ALLFIELDS_STR))) {
1263 // Our special strings are OK.
1264 }
1265 else {
1266 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
1267 }
1268 }
1269 content_element.setAttribute(NAME_ATTRIBUTE, content_str);
1270 index_element.appendChild(content_element);
1271 content_element = null;
1272 }
1273 content_tokenizer = null;
1274 index_str = null;
1275 command_element.appendChild(index_element);
1276 index_element = null;
1277 }
1278 tokenizer = null;
1279 }
1280 catch (Exception exception) {
1281 exception.printStackTrace();
1282 }
1283 return command_element;
1284 }
1285
1286 private Element parseIndexDefault(String command_str) {
1287 Element command_element = null;
1288 try {
1289 StringTokenizer tokenizer = new StringTokenizer(command_str);
1290 if(tokenizer.countTokens() >= 2) {
1291 command_element = document.createElement(INDEX_DEFAULT_ELEMENT);
1292 command_element.setAttribute(ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken()).equals(INDEX_DEFAULT_STR) ? TRUE_STR : FALSE_STR));
1293 String index_str = tokenizer.nextToken();
1294 command_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
1295 String content_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
1296 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1297 while(content_tokenizer.hasMoreTokens()) {
1298 Element content_element = document.createElement(CONTENT_ELEMENT);
1299 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1300 command_element.appendChild(content_element);
1301 content_element = null;
1302 }
1303 content_tokenizer = null;
1304 content_str = null;
1305 content_str = null;
1306 index_str = null;
1307 }
1308 tokenizer = null;
1309 }
1310 catch (Exception exception) {
1311 }
1312 return command_element;
1313 }
1314
1315 private Element parseLanguage(String command_str) {
1316 Element command_element = null;
1317 try {
1318 StringTokenizer tokenizer = new StringTokenizer(command_str);
1319 tokenizer.nextToken();
1320 if(tokenizer.hasMoreTokens()) {
1321 command_element = document.createElement(LANGUAGES_ELEMENT);
1322 while(tokenizer.hasMoreTokens()) {
1323 Element language_element = document.createElement(LANGUAGE_ELEMENT);
1324 language_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1325 command_element.appendChild(language_element);
1326 language_element = null;
1327 }
1328 }
1329 tokenizer = null;
1330 }
1331 catch (Exception exception) {
1332 }
1333 return command_element;
1334 }
1335
1336 private Element parseLanguageDefault(String command_str) {
1337 Element command_element = null;
1338 try {
1339 StringTokenizer tokenizer = new StringTokenizer(command_str);
1340 if(tokenizer.countTokens() >= 2) {
1341 command_element = document.createElement(LANGUAGE_DEFAULT_ELEMENT);
1342 tokenizer.nextToken();
1343 String default_language_str = tokenizer.nextToken();
1344 command_element.setAttribute(NAME_ATTRIBUTE, default_language_str);
1345 command_element.setAttribute(ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1346 default_language_str = null;
1347 }
1348 tokenizer = null;
1349 }
1350 catch (Exception exception) {
1351 }
1352 return command_element;
1353 }
1354
1355 private Element parseLevels(String command_str) {
1356 Element command_element = null;
1357 try {
1358 StringTokenizer tokenizer = new StringTokenizer(command_str);
1359 // First token is command type (levels)
1360 String command = tokenizer.nextToken();
1361 if(tokenizer.hasMoreTokens()) {
1362 command_element = document.createElement(LEVELS_ELEMENT);
1363 command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(LEVELS_STR) ? TRUE_STR : FALSE_STR));
1364 while(tokenizer.hasMoreTokens()) {
1365 Element level_element = document.createElement(CONTENT_ELEMENT);
1366 level_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1367 command_element.appendChild(level_element);
1368 level_element = null;
1369 }
1370 }
1371 command = null;
1372 }
1373 catch(Exception exception) {
1374 }
1375 return command_element;
1376 }
1377
1378 private Element parseMetadata(CommandTokenizer tokenizer) {
1379 Element command_element = null;
1380 try {
1381 command_element = document.createElement(COLLECTIONMETADATA_ELEMENT);
1382 String name_str = tokenizer.nextToken();
1383 String value_str = tokenizer.nextToken();
1384 if(name_str != null && value_str != null) {
1385 String language_str = Configuration.getLanguage();
1386 // Check if the value string is actually a language string
1387 if(value_str.startsWith(LBRACKET_CHARACTER) && value_str.endsWith(RBRACKET_CHARACTER)) {
1388 language_str = value_str.substring(value_str.indexOf(LANGUAGE_ARGUMENT) + 2, value_str.length() - 1);
1389 value_str = tokenizer.nextToken();
1390 }
1391 if(value_str != null) {
1392 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
1393 value_str = Codec.transform(value_str, Codec.GREENSTONE_TO_DOM);
1394 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1395 command_element.setAttribute(LANGUAGE_ATTRIBUTE, language_str);
1396 command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1397 XMLTools.setValue(command_element, value_str);
1398 }
1399 else {
1400 command_element = null;
1401 }
1402 language_str = null;
1403 }
1404 else {
1405 command_element = null;
1406 }
1407 name_str = null;
1408 value_str = null;
1409 }
1410 catch (Exception exception) {
1411 DebugStream.printStackTrace(exception);
1412 command_element = null;
1413 }
1414 return command_element;
1415 }
1416
1417 private Element parseMetadataSpecial(String command_str) {
1418 Element command_element = null;
1419 try {
1420 StringTokenizer tokenizer = new StringTokenizer(command_str);
1421 if(tokenizer.countTokens() >= 2) {
1422 String name_str = tokenizer.nextToken();
1423 String value_str = tokenizer.nextToken();
1424 // if(name_str.equals(COLLECTIONMETADATA_BETA_STR)) {
1425 // command_element = document.createElement(COLLECTIONMETADATA_BETA_ELEMENT);
1426 // }
1427 if (name_str.equals(COLLECTIONMETADATA_CREATOR_STR)) {
1428 command_element = document.createElement(COLLECTIONMETADATA_CREATOR_ELEMENT);
1429 }
1430 else if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
1431 command_element = document.createElement(COLLECTIONMETADATA_MAINTAINER_ELEMENT);
1432 }
1433 else if(name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
1434 command_element = document.createElement(COLLECTIONMETADATA_PUBLIC_ELEMENT);
1435 }
1436 if(command_element != null) {
1437 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1438 command_element.setAttribute(LANGUAGE_ATTRIBUTE, ENGLISH_LANGUAGE_STR);
1439 command_element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
1440 command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1441 if(value_str.startsWith(SPEECH_CHARACTER) && value_str.endsWith(SPEECH_CHARACTER)) {
1442 value_str = value_str.substring(1, value_str.length() - 1);
1443 }
1444 XMLTools.setValue(command_element, value_str);
1445 }
1446 value_str = null;
1447 name_str = null;
1448 }
1449 tokenizer = null;
1450 }
1451 catch (Exception exception) {
1452 }
1453 return command_element;
1454 }
1455
1456 private Element parsePlugIn(String command_str) {
1457 Element command_element = null;
1458 boolean use_metadata_files = false;
1459 try {
1460 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1461 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1462 if(tokenizer.countTokens() >= 2) {
1463 command_element = document.createElement(PLUGIN_ELEMENT);
1464 // First token is plugin
1465 tokenizer.nextToken();
1466 // The next token is the type
1467 String type = tokenizer.nextToken();
1468 command_element.setAttribute(TYPE_ATTRIBUTE, type);
1469 // Now we parse out the remaining arguments into a hashmapping from name to value
1470 HashMap arguments = parseArguments(tokenizer);
1471 // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
1472 Iterator names = arguments.keySet().iterator();
1473 while(names.hasNext()) {
1474 String name = (String) names.next();
1475 String value = (String) arguments.get(name); // Can be null
1476 Element option_element = document.createElement(OPTION_ELEMENT);
1477 if(name.substring(1).equals(USE_METADATA_FILES_ARGUMENT)) {
1478 use_metadata_files = true;
1479 }
1480 option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
1481 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1482 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); // All arguments are considered to be custom until matched against base plugins arguments
1483 if(value != null) {
1484 // Remove any speech marks appended in strings containing whitespace
1485 if(value.startsWith(SPEECH_CHARACTER) && value.endsWith(SPEECH_CHARACTER)) {
1486 value = value.substring(1, value.length() - 1);
1487 }
1488 if(name.equals(METADATA_ARGUMENT)) {
1489 // The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace.
1490 if(value.indexOf(StaticStrings.NS_SEP) == -1) {
1491 value = StaticStrings.EXTRACTED_NAMESPACE + value;
1492 }
1493 }
1494 XMLTools.setValue(option_element, value);
1495 }
1496 command_element.appendChild(option_element);
1497 option_element = null;
1498 name = null;
1499 value = null;
1500 }
1501
1502 // We must have some RecPlug options: use_metadata_files, and show_progress
1503 if (type.equals(RECPLUG_STR)) {
1504 if (!use_metadata_files) {
1505 Element option_element = document.createElement(OPTION_ELEMENT);
1506 option_element.setAttribute(NAME_ATTRIBUTE, USE_METADATA_FILES_ARGUMENT);
1507 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1508 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
1509 command_element.appendChild(option_element);
1510 option_element = null;
1511 }
1512 }
1513 type = null;
1514 names = null;
1515 arguments = null;
1516 }
1517 tokenizer = null;
1518 }
1519 catch(Exception exception) {
1520 }
1521 return command_element;
1522 }
1523
1524 private Element parseSearchType(String command_str) {
1525 Element command_element = null;
1526 try {
1527 StringTokenizer tokenizer = new StringTokenizer(command_str);
1528 // First token is command type (levels)
1529 tokenizer.nextToken();
1530 if(tokenizer.hasMoreTokens()) {
1531 command_element = document.createElement(SEARCHTYPE_ELEMENT);
1532 command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1533 while(tokenizer.hasMoreTokens()) {
1534 Element search_element = document.createElement(CONTENT_ELEMENT);
1535 search_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1536 command_element.appendChild(search_element);
1537 search_element = null;
1538 }
1539 }
1540 }
1541 catch(Exception exception) {
1542 }
1543 return command_element;
1544 }
1545
1546 private Element parseSubCollection(String command_str) {
1547 Element command_element = null;
1548 try {
1549 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1550 if(tokenizer.countTokens() >= 3) {
1551 command_element = document.createElement(SUBCOLLECTION_ELEMENT);
1552 // First token is command type
1553 tokenizer.nextToken();
1554 // Then subcollection identifier
1555 command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1556 // Then finally the pattern used to build the subcollection partition
1557 String full_pattern_str = tokenizer.nextToken();
1558 // Set inclusion/exclusion flag and remove any exclamation mark
1559 boolean exclusion = full_pattern_str.startsWith(EXCLAMATION_CHARACTER);
1560 if (exclusion) {
1561 full_pattern_str = full_pattern_str.substring(1, full_pattern_str.length());
1562 command_element.setAttribute(TYPE_ATTRIBUTE, EXCLUDE_STR);
1563 }
1564 else {
1565 command_element.setAttribute(TYPE_ATTRIBUTE, INCLUDE_STR);
1566 }
1567 StringTokenizer pattern_tokenizer = new StringTokenizer(full_pattern_str, SEPARATOR_CHARACTER);
1568 if(pattern_tokenizer.countTokens() >= 2) {
1569 String content_str = pattern_tokenizer.nextToken();
1570 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1571 if(!content_str.equals(StaticStrings.FILENAME_STR) && content_str.indexOf(StaticStrings.NS_SEP) == -1) {
1572 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
1573 }
1574 command_element.setAttribute(CONTENT_ATTRIBUTE, content_str);
1575 XMLTools.setValue(command_element, pattern_tokenizer.nextToken());
1576 if(pattern_tokenizer.hasMoreTokens()) {
1577 command_element.setAttribute(OPTIONS_ATTRIBUTE, pattern_tokenizer.nextToken());
1578 }
1579 }
1580 pattern_tokenizer = null;
1581 }
1582 }
1583 catch(Exception exception) {
1584 exception.printStackTrace();
1585 }
1586 return command_element;
1587 }
1588
1589 private Element parseSubCollectionDefaultIndex(String command_str) {
1590 Element command_element = null;
1591 try {
1592 StringTokenizer tokenizer = new StringTokenizer(command_str);
1593 if(tokenizer.countTokens() == 2) {
1594 command_element = document.createElement(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT);
1595 tokenizer.nextToken();
1596 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1597 String content_str = tokenizer.nextToken();
1598 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1599 while(content_tokenizer.hasMoreTokens()) {
1600 Element content_element = document.createElement(CONTENT_ELEMENT);
1601 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1602 command_element.appendChild(content_element);
1603 content_element = null;
1604 }
1605 content_tokenizer = null;
1606 content_str = null;
1607 }
1608 tokenizer = null;
1609 }
1610 catch(Exception exception) {
1611 }
1612 return command_element;
1613 }
1614
1615 private Element parseSubCollectionIndex(String command_str) {
1616 Element command_element = null;
1617 try {
1618 StringTokenizer tokenizer = new StringTokenizer(command_str);
1619 tokenizer.nextToken();
1620 if(tokenizer.hasMoreTokens()) {
1621 command_element = document.createElement(SUBCOLLECTION_INDEXES_ELEMENT);
1622 }
1623 while(tokenizer.hasMoreTokens()) {
1624 Element subcollectionindex_element = document.createElement(INDEX_ELEMENT);
1625 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1626 String content_str = tokenizer.nextToken();
1627 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1628 while(content_tokenizer.hasMoreTokens()) {
1629 Element content_element = document.createElement(CONTENT_ELEMENT);
1630 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1631 subcollectionindex_element.appendChild(content_element);
1632 content_element = null;
1633 }
1634 content_tokenizer = null;
1635 content_str = null;
1636 command_element.appendChild(subcollectionindex_element);
1637 subcollectionindex_element = null;
1638 }
1639 tokenizer = null;
1640 }
1641 catch (Exception exception) {
1642 }
1643 return command_element;
1644 }
1645
1646 private Element parseSuperCollection(String command_str) {
1647 Element command_element = null;
1648 try {
1649 StringTokenizer tokenizer = new StringTokenizer(command_str);
1650 if(tokenizer.countTokens() >= 3) {
1651 command_element = document.createElement(SUPERCOLLECTION_ELEMENT);
1652 tokenizer.nextToken();
1653 while(tokenizer.hasMoreTokens()) {
1654 Element collection_element = document.createElement(COLLECTION_ELEMENT);
1655 collection_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1656 command_element.appendChild(collection_element);
1657 collection_element = null;
1658 }
1659 }
1660 tokenizer = null;
1661 }
1662 catch(Exception exception) {
1663 }
1664 return command_element;
1665 }
1666
1667 private String pluginToString(Element command_element, boolean show_extracted_namespace) {
1668 StringBuffer text = new StringBuffer();
1669 if(!command_element.getAttribute(SEPARATOR_ATTRIBUTE).equals(TRUE_STR)) {
1670 text.append(PLUGIN_STR);
1671 text.append(TAB_CHARACTER);
1672 text.append(TAB_CHARACTER);
1673 text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
1674 // Retrieve, and output, the arguments
1675 NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
1676 int option_elements_length = option_elements.getLength();
1677 if(option_elements_length > 0) {
1678 text.append(SPACE_CHARACTER);
1679 for(int j = 0; j < option_elements_length; j++) {
1680 Element option_element = (Element) option_elements.item(j);
1681 if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1682 text.append(StaticStrings.MINUS_CHARACTER);
1683 text.append(option_element.getAttribute(NAME_ATTRIBUTE));
1684 String value_str = XMLTools.getValue(option_element);
1685 if(!show_extracted_namespace && value_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
1686 value_str = value_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
1687 }
1688 if(value_str.length() > 0) {
1689 text.append(SPACE_CHARACTER);
1690 if(value_str.indexOf(SPACE_CHARACTER) == -1) {
1691 text.append(value_str);
1692 }
1693 else {
1694 text.append(SPEECH_CHARACTER);
1695 text.append(value_str);
1696 text.append(SPEECH_CHARACTER);
1697 }
1698 }
1699 value_str = null;
1700 if(j < option_elements_length - 1) {
1701 text.append(SPACE_CHARACTER);
1702 }
1703 }
1704 option_element = null;
1705 }
1706 }
1707 option_elements = null;
1708 }
1709 return text.toString();
1710 }
1711
1712 private String searchtypeToString(Element command_element) {
1713 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1714 StringBuffer text = new StringBuffer(SEARCHTYPE_STR);
1715 text.append(TAB_CHARACTER);
1716 NodeList search_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1717 int search_elements_length = search_elements.getLength();
1718 for(int i = 0; i < search_elements_length; i++) {
1719 Element search_element = (Element) search_elements.item(i);
1720 text.append(search_element.getAttribute(NAME_ATTRIBUTE));
1721 text.append(SPACE_CHARACTER);
1722 }
1723 return text.substring(0, text.length() - 1);
1724 }
1725 else {
1726 return null;
1727 }
1728 }
1729
1730 private String subcollectionToString(Element command_element, boolean show_extracted_namespace) {
1731 StringBuffer text = new StringBuffer(SUBCOLLECTION_STR);
1732 text.append(SPACE_CHARACTER);
1733 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
1734 text.append(SPACE_CHARACTER);
1735 text.append(TAB_CHARACTER);
1736 text.append(SPEECH_CHARACTER);
1737 if(command_element.getAttribute(TYPE_ATTRIBUTE).equals(EXCLUDE_STR)) {
1738 text.append(EXCLAMATION_CHARACTER);
1739 }
1740 String content_str = command_element.getAttribute(CONTENT_ATTRIBUTE);
1741 if(!show_extracted_namespace && content_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
1742 content_str = content_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
1743 }
1744 text.append(content_str);
1745 content_str = null;
1746 text.append(SEPARATOR_CHARACTER);
1747 text.append(XMLTools.getValue(command_element));
1748 text.append(SEPARATOR_CHARACTER);
1749 String options_str = command_element.getAttribute(OPTIONS_ATTRIBUTE);
1750 if(options_str.length() > 0) {
1751 text.append(options_str);
1752 }
1753 options_str = null;
1754 text.append(SPEECH_CHARACTER);
1755 return text.toString();
1756 }
1757
1758 private String subcollectionDefaultIndexToString(Element command_element) {
1759 StringBuffer text = new StringBuffer(SUBCOLLECTION_DEFAULT_INDEX_STR);
1760 text.append(TAB_CHARACTER);
1761 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1762 int content_elements_length = content_elements.getLength();
1763 for(int j = 0; j < content_elements_length; j++) {
1764 Element content_element = (Element) content_elements.item(j);
1765 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1766 if(j < content_elements_length - 1) {
1767 text.append(StaticStrings.COMMA_CHARACTER);
1768 }
1769 }
1770 return text.toString();
1771 }
1772
1773 private String subcollectionIndexesToString(Element command_element) {
1774 StringBuffer text = new StringBuffer(SUBCOLLECTION_INDEX_STR);
1775 text.append(TAB_CHARACTER);
1776 // Retrieve all of the subcollection index partitions
1777 NodeList subcollectionindex_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
1778 int subcollectionindex_elements_length = subcollectionindex_elements.getLength();
1779 if(subcollectionindex_elements_length == 0) {
1780 return null;
1781 }
1782 for(int j = 0; j < subcollectionindex_elements_length; j++) {
1783 Element subcollectionindex_element = (Element) subcollectionindex_elements.item(j);
1784 NodeList content_elements = subcollectionindex_element.getElementsByTagName(CONTENT_ELEMENT);
1785 int content_elements_length = content_elements.getLength();
1786 for(int k = 0; k < content_elements_length; k++) {
1787 Element content_element = (Element) content_elements.item(k);
1788 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1789 if(k < content_elements_length - 1) {
1790 text.append(StaticStrings.COMMA_CHARACTER);
1791 }
1792 }
1793 if(j < subcollectionindex_elements_length - 1) {
1794 text.append(SPACE_CHARACTER);
1795 }
1796 }
1797 return text.toString();
1798 }
1799
1800 private String supercollectionToString(Element command_element) {
1801 NodeList content_elements = command_element.getElementsByTagName(COLLECTION_ELEMENT);
1802 int content_elements_length = content_elements.getLength();
1803 if(content_elements_length > 1) {
1804 StringBuffer text = new StringBuffer(SUPERCOLLECTION_STR);
1805 text.append(TAB_CHARACTER);
1806 for(int j = 0; j < content_elements_length; j++) {
1807 Element content_element = (Element) content_elements.item(j);
1808 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1809 if(j < content_elements_length - 1) {
1810 text.append(SPACE_CHARACTER);
1811 }
1812 }
1813 return text.toString();
1814 }
1815 return null;
1816 }
1817
1818 private String unknownToString(Element command_element) {
1819 return XMLTools.getValue(command_element);
1820 }
1821
1822 /** Write the text to the buffer. This is used so we don't have to worry about storing intermediate String values just so we can calaulate length and offset.
1823 * @param writer the BufferedWriter to which the str will be written
1824 * @param str the String to be written
1825 */
1826 private void write(BufferedWriter writer, String str)
1827 throws IOException {
1828 writer.write(str, 0, str.length());
1829 }
1830}
Note: See TracBrowser for help on using the repository browser.