source: trunk/gli/src/org/greenstone/gatherer/cdm/CollectionConfiguration.java@ 8313

Last change on this file since 8313 was 8313, checked in by mdewsnip, 20 years ago

Finally committing the (many) changes to the GLI to use the new metadata code... I hope this doesn't have too many bugs in it and committing it now doesn't stuff anyone up! (Katherine said I could commit it, so blame her if anything goes wrong).

  • Property svn:keywords set to Author Date Id Revision
File size: 78.1 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Author: John Thompson, Greenstone Digital Library, University of Waikato
9 *
10 * Copyright (C) 1999 New Zealand Digital Library Project
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *########################################################################
26 */
27package org.greenstone.gatherer.cdm;
28
29import java.awt.*;
30import java.awt.event.*;
31import java.io.*;
32import java.util.*;
33import javax.swing.*;
34import org.greenstone.gatherer.Configuration;
35import org.greenstone.gatherer.DebugStream;
36import org.greenstone.gatherer.Gatherer;
37import org.greenstone.gatherer.gui.GLIButton;
38import org.greenstone.gatherer.metadata.MetadataElement;
39import org.greenstone.gatherer.metadata.MetadataTools;
40import org.greenstone.gatherer.util.DOMTree;
41import org.greenstone.gatherer.util.Codec;
42import org.greenstone.gatherer.util.StaticStrings;
43import org.greenstone.gatherer.util.Utility;
44import org.greenstone.gatherer.util.XMLTools;
45import org.w3c.dom.*;
46
47/** This class provides either access to a pseudo-G3 document, or parses a collect.cfg file in such a way as to provide an xml-type view of its content. This later version is useful as it allows the manipulation and free form editing of a legacy collect.cfg file while still allowing the various CDM data managers to base themselves directly on this model (whereas they used to be independant ListModels which clobbered the ordering of unparsed commands).
48 * @author John Thompson, Greenstone Digital Library, University of Waikato
49 * @version 2.3d
50 */
51public class CollectionConfiguration
52 extends StaticStrings {
53
54 static final public String ENCODING = "UTF-8";
55
56 static public Document document;
57
58 static public void main(String[] args) {
59 if(args.length >= 1) {
60 File file = new File(args[0]);
61 CollectionConfiguration collect_cfg = new CollectionConfiguration(file);
62 collect_cfg.save(true);
63 collect_cfg.save(false);
64 collect_cfg = null;
65 }
66 else {
67 System.out.println("Usage: CollectionConfiguration <filename>");
68 }
69 }
70
71 /** Find the best insertion position for the given DOM Element. This should try to match command tag, and if found should then try to group by name or type (eg CollectionMeta), or append to end is no such grouping exists (eg PlugIns). Failing a command match it will check against the command order for the best insertion location.
72 * @param target_element the command Element to be inserted
73 * @return the Element which the given command should be inserted before, or null to append to end of list
74 */
75 static public Node findInsertionPoint(Element target_element) {
76 ///ystem.err.println("Find insertion point: " + target_element.getNodeName());
77 String target_element_name = target_element.getNodeName();
78 Element document_element = document.getDocumentElement();
79 // Try to find commands with the same tag.
80 NodeList matching_elements = document_element.getElementsByTagName(target_element_name);
81 // If we found matching elements, then we have our most likely insertion location, so check within for groupings
82 if(matching_elements.getLength() != 0) {
83 ///ystem.err.println("Found matching elements.");
84 // Only CollectionMeta are grouped.
85 if(target_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
86 ///ystem.err.println("Dealing with collection metadata");
87 // Special case: CollectionMeta can be added at either the start or end of a collection configuration file. However the start position is reserved for special metadata, so if no non-special metadata can be found we must append to the end.
88 // So if the command to be added is special add it immediately after any other special command
89 if(target_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
90 int index = 0;
91 Element matched_element = (Element) matching_elements.item(index);
92 Element sibling_element = (Element) matched_element.getNextSibling();
93 while(sibling_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
94 index++;
95 matched_element = (Element) matching_elements.item(index);
96 sibling_element = (Element) matched_element.getNextSibling();
97 }
98 if(sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
99 Element newline_element = document.createElement(NEWLINE_ELEMENT);
100 document_element.insertBefore(newline_element, sibling_element);
101 }
102 return sibling_element;
103 }
104 // Otherwise try to find a matching 'name' and add after the last one in that group.
105 else {
106 int index = 0;
107 target_element_name = target_element.getAttribute(NAME_ATTRIBUTE);
108 boolean found = false;
109 // Skip all of the special metadata
110 Element matched_element = (Element) matching_elements.item(index);
111 while(matched_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
112 index++;
113 matched_element = (Element) matching_elements.item(index);
114 }
115 // Begin search
116 while(!found && matched_element != null) {
117 if(matched_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
118 found = true;
119 }
120 else {
121 index++;
122 matched_element = (Element) matching_elements.item(index);
123 }
124 }
125 // If we found a match, we need to continue checking until we find the last name match.
126 if(found) {
127 index++;
128 Element previous_sibling = matched_element;
129 Element sibling_element = (Element) matching_elements.item(index);
130 while(sibling_element != null && sibling_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
131 previous_sibling = sibling_element;
132 index++;
133 sibling_element = (Element) matching_elements.item(index);
134 }
135 // Previous sibling now holds the command immediately before where we want to add, so find its next sibling and add to that. In this one case we can ignore new lines!
136 return previous_sibling.getNextSibling();
137 }
138 // If not found we just add after last metadata element
139 else {
140 Element last_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
141 return last_element.getNextSibling();
142 }
143 }
144
145 }
146 else {
147 ///ystem.err.println("Not dealing with collection meta.");
148 Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
149 // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
150 Node sibling_element = matched_element.getNextSibling();
151 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
152 Element newline_element = document.createElement(NEWLINE_ELEMENT);
153 document_element.insertBefore(newline_element, sibling_element);
154 }
155 return sibling_element; // Note that this may be null
156 }
157 }
158 ///ystem.err.println("No matching elements found.");
159 // Locate where this command is in the ordering
160 int command_index = -1;
161 for(int i = 0; command_index == -1 && i < COMMAND_ORDER.length; i++) {
162 if(COMMAND_ORDER[i].equals(target_element_name)) {
163 command_index = i;
164 }
165 }
166 ///ystem.err.println("Command index is: " + command_index);
167 // Now move forward, checking for existing elements in each of the preceeding command orders.
168 int preceeding_index = command_index - 1;
169 ///ystem.err.println("Searching before the target command.");
170 while(preceeding_index >= 0) {
171 matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[preceeding_index]);
172 // If we've found a match
173 if(matching_elements.getLength() > 0) {
174 // We add after the last element
175 Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
176 // One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
177 Node sibling_element = matched_element.getNextSibling();
178 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
179 Element newline_element = document.createElement(NEWLINE_ELEMENT);
180 document_element.insertBefore(newline_element, sibling_element);
181 }
182 return sibling_element; // Note that this may be null
183 }
184 preceeding_index--;
185 }
186 // If all that fails, we now move backwards through the commands
187 int susceeding_index = command_index + 1;
188 ///ystem.err.println("Searching after the target command.");
189 while(susceeding_index < COMMAND_ORDER.length) {
190 matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[susceeding_index]);
191 // If we've found a match
192 if(matching_elements.getLength() > 0) {
193 // We add before the first element
194 Element matched_element = (Element) matching_elements.item(0);
195 // One final quick test. If the matched element is immediately preceeded by a NewLine command, then we insert another NewLine before the matched command, then return this new NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
196 Node sibling_element = matched_element.getPreviousSibling();
197 if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
198 Element newline_element = document.createElement(NEWLINE_ELEMENT);
199 document_element.insertBefore(newline_element, sibling_element);
200 }
201 return sibling_element; // Note that this may be null
202 }
203 susceeding_index++;
204 }
205 // Well. Apparently there are no other commands in this collection configuration. So append away...
206 return null;
207 }
208
209 static public String toString(Element command_element, boolean show_extracted_namespace) {
210 String command_element_name = command_element.getNodeName();
211 if(command_element_name.equals(CLASSIFY_ELEMENT)) {
212 return self.classifyToString(command_element, show_extracted_namespace);
213 }
214 else if(command_element_name.equals(FORMAT_ELEMENT)) {
215 return self.formatToString(command_element, show_extracted_namespace);
216 }
217 else if(command_element_name.equals(INDEXES_ELEMENT)) {
218 return self.indexesToString(command_element, show_extracted_namespace);
219 }
220 else if(command_element_name.equals(INDEX_DEFAULT_ELEMENT)) {
221 return self.indexDefaultToString(command_element, show_extracted_namespace);
222 }
223 else if(command_element_name.equals(LANGUAGES_ELEMENT)) {
224 return self.languagesToString(command_element);
225 }
226 else if(command_element_name.equals(LANGUAGE_DEFAULT_ELEMENT)) {
227 return self.languageDefaultToString(command_element);
228 }
229 else if(command_element_name.equals(LEVELS_ELEMENT)) {
230 return self.levelsToString(command_element);
231 }
232 else if(command_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
233 return self.metadataToString(command_element, show_extracted_namespace);
234 }
235 else if(command_element_name.equals(COLLECTIONMETADATA_CREATOR_ELEMENT)) {
236 return self.metadataToString(command_element, show_extracted_namespace);
237 }
238 else if(command_element_name.equals(COLLECTIONMETADATA_MAINTAINER_ELEMENT)) {
239 return self.metadataToString(command_element, show_extracted_namespace);
240 }
241 else if(command_element_name.equals(COLLECTIONMETADATA_PUBLIC_ELEMENT)) {
242 return self.metadataToString(command_element, show_extracted_namespace);
243 }
244 // else if(command_element_name.equals(COLLECTIONMETADATA_BETA_ELEMENT)) {
245 // return self.metadataToString(command_element, show_extracted_namespace);
246 // }
247 else if(command_element_name.equals(PLUGIN_ELEMENT)) {
248 return self.pluginToString(command_element, show_extracted_namespace);
249 }
250 else if(command_element_name.equals(SEARCHTYPE_ELEMENT)) {
251 return self.searchtypeToString(command_element);
252 }
253 else if(command_element_name.equals(SUBCOLLECTION_ELEMENT)) {
254 return self.subcollectionToString(command_element, show_extracted_namespace);
255 }
256 else if(command_element_name.equals(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) {
257 return self.subcollectionDefaultIndexToString(command_element);
258 }
259 else if(command_element_name.equals(SUBCOLLECTION_INDEXES_ELEMENT)) {
260 return self.subcollectionIndexesToString(command_element);
261 }
262 else if(command_element_name.equals(SUPERCOLLECTION_ELEMENT)) {
263 return self.supercollectionToString(command_element);
264 }
265 else if(command_element_name.equals(UNKNOWN_ELEMENT)) {
266 return self.unknownToString(command_element);
267 }
268 return "";
269 }
270
271 /** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix.
272 * @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string
273 * @return a HashMap containing the arguments parsed
274 */
275 static public HashMap parseArguments(CommandTokenizer tokenizer) {
276 HashMap arguments = new HashMap();
277 String name = null;
278 String value = null;
279 while(tokenizer.hasMoreTokens() || name != null) {
280 // First we retrieve a name if we need one.
281 if(name == null) {
282 name = tokenizer.nextToken();
283 }
284 // Now we attempt to retrieve a value
285 if(tokenizer.hasMoreTokens()) {
286 value = tokenizer.nextToken();
287 // Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop.
288 if(value.startsWith(StaticStrings.MINUS_CHARACTER)) {
289 arguments.put(name, null);
290 name = value;
291 }
292 // Otherwise we have a typical name->value pair ready to go
293 else {
294 arguments.put(name, value);
295 name = null;
296 }
297 }
298 // Otherwise its a binary flag
299 else {
300 arguments.put(name, null);
301 name = null;
302 }
303 }
304 return arguments;
305 }
306
307 static private ArrayList known_metadata;
308
309 static private CollectionConfiguration self;
310
311 /** Gives the preferred ordering of commands */
312 static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, /* StaticStrings.COLLECTIONMETADATA_BETA_ELEMENT, */ StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT};
313
314 /** ************************** Public Data Members ***************************/
315
316 /** ************************** Private Data Members ***************************/
317
318 /** Is the configuration file currently being read in one of the infamous G2.39 ones. */
319 private boolean is_twopointthreenine = false;
320 private File collect_config_file;
321
322 /** ************************** Public Methods ***************************/
323
324 public CollectionConfiguration(File collect_config_file) {
325 this.self = this;
326 this.collect_config_file = collect_config_file;
327 // If collect_cfg is xml we can load it straight away
328 String collect_config_name = collect_config_file.getName();
329 if(collect_config_name.equals(COLLECTCONFIGURATION_XML)) {
330 // Parse with Utility but don't use class loader
331 document = Utility.parse(collect_config_file, false);
332 }
333 // Otherwise if this is a legacy collect.cfg file then read in the template and send to magic parser
334 else if(collect_config_name.equals(COLLECT_CFG)) {
335 document = Utility.parse(PSEUDO_COLLECTCONFIGURATION_XML, true);
336 parse(collect_config_file);
337 }
338 }
339
340 /** This debug facility shows the currently loaded collect.cfg or CollectConfig.xml file as a DOM tree. */
341 public void display() {
342 JDialog dialog = new JDialog(Gatherer.g_man, "Collection Configuration", false);
343 dialog.setSize(400,400);
344 JPanel content_pane = (JPanel) dialog.getContentPane();
345 final DOMTree tree = new DOMTree(document);
346 JButton refresh_button = new GLIButton("Refresh Tree");
347 refresh_button.setMnemonic(KeyEvent.VK_R);
348 refresh_button.addActionListener(new ActionListener() {
349 public void actionPerformed(ActionEvent event) {
350 tree.setDocument(document);
351 }
352 });
353 content_pane.setBorder(BorderFactory.createEmptyBorder(5,5,5,5));
354 content_pane.setLayout(new BorderLayout());
355 content_pane.add(new JScrollPane(tree), BorderLayout.CENTER);
356 content_pane.add(refresh_button, BorderLayout.SOUTH);
357 dialog.show();
358 }
359
360// public Element getBeta() {
361// Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_BETA_ELEMENT, null, null);
362// element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_BETA_STR);
363// element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
364// return element;
365// }
366
367 public Element getCreator() {
368 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_CREATOR_ELEMENT, null, null);
369 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_CREATOR_STR);
370 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
371 return element;
372 }
373
374 public Element getDocumentElement() {
375 return document.getDocumentElement();
376 }
377
378 public File getFile() {
379 return collect_config_file;
380 }
381
382 /** Retrieve or create the languages Element. */
383 public Element getLanguages() {
384 return getOrCreateElementByTagName(LANGUAGES_ELEMENT, null, null);
385 }
386
387 public Element getLevels() {
388 return getOrCreateElementByTagName(LEVELS_ELEMENT, null, null);
389 }
390
391 public Element getMaintainer() {
392 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_MAINTAINER_ELEMENT, null, null);
393 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_MAINTAINER_STR);
394 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
395 return element;
396 }
397
398 /** Retrieve or create the indexes Element. Note that this method behaves differently from the other getBlah methods, in that it also has to keep in mind that indexes come in two flavours, MG and MGPP. */
399 public Element getMGIndexes() {
400 return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, FALSE_STR);
401 }
402
403 public Element getMGPPIndexes() {
404 return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, TRUE_STR);
405 }
406
407 public Element getPublic() {
408 Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_PUBLIC_ELEMENT, null, null);
409 element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_PUBLIC_STR);
410 element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
411 return element;
412 }
413
414 /** Retrieve or create the searchtype element. */
415 public Element getSearchType() {
416 ///ystem.err.println("Get or create element by tag name: " + name);
417 Element document_element = document.getDocumentElement();
418 NodeList elements = document_element.getElementsByTagName(SEARCHTYPE_ELEMENT);
419 int elements_length = elements.getLength();
420 if(elements_length > 0) {
421 document_element = null;
422 return (Element) elements.item(0);
423 }
424 // Create the element
425 Element element = document.createElement(SEARCHTYPE_ELEMENT);
426 Node target_node = findInsertionPoint(element);
427 if(target_node != null) {
428 document_element.insertBefore(element, target_node);
429 }
430 else {
431 document_element.appendChild(element);
432 }
433 document_element = null;
434 // Append a default search type node - form
435 Element a_searchtype_element = CollectionDesignManager.collect_config.document.createElement(CollectionConfiguration.CONTENT_ELEMENT);
436 a_searchtype_element.setAttribute(CollectionConfiguration.NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]);
437 element.appendChild(a_searchtype_element);
438 return element;
439 }
440
441 /** Retrieve or create the subindexes Element. */
442 public Element getSubIndexes() {
443 return getOrCreateElementByTagName(SUBCOLLECTION_INDEXES_ELEMENT, null, null);
444 }
445
446 /** Retrieve or create the supercollections Element. */
447 public Element getSuperCollection() {
448 return getOrCreateElementByTagName(SUPERCOLLECTION_ELEMENT, null, null);
449 }
450
451 public boolean ready() {
452 return document != null;
453 }
454
455 public void save() {
456 save(false);
457 }
458
459 public void save(boolean force_xml) {
460 if(collect_config_file.exists()) {
461 File original_file = new File(collect_config_file.getParentFile(), COLLECT_CFG);
462 File backup_file = new File(collect_config_file.getParentFile(), "collect.bak");
463 if(backup_file.exists()) {
464 backup_file.delete();
465 }
466 if(!original_file.renameTo(backup_file)) {
467 DebugStream.println("Can't rename collect.cfg");
468 }
469 }
470 if(force_xml || collect_config_file.getName().equals(COLLECTCONFIGURATION_XML)) {
471 ///ystem.err.println("Writing XML");
472 Utility.export(document, new File(collect_config_file.getParentFile(), COLLECTCONFIGURATION_XML));
473 }
474 else {
475 ///ystem.err.println("Writing text");
476 try {
477 OutputStream ostream = new FileOutputStream(collect_config_file);
478 Writer file_writer = new OutputStreamWriter(ostream, ENCODING);
479 //FileWriter file_writer = new FileWriter(collect_config_file, false);
480 BufferedWriter buffered_writer = new BufferedWriter(file_writer);
481 // In order to write out an old style collect.cfg we have to traverse the model and do several 'cute' tricks to ensure the collect.cfg is valid (for instance while every metadata element has a language attribute, only second or subsequent metadata, for a certain name, needs a language argument - hence the known metadata array. Note that within GLI the language will always be shown, but it doesn't crash and burn like G2 does, te-he).
482 // is this still true?? now we are writing all metadata with a lang tag. can we get rid of known_metadata??
483 // Yes we can. Lets see..
484 // known_metadata = new ArrayList(); 'blamo'
485 Element collect_config_element = document.getDocumentElement();
486 NodeList command_elements = collect_config_element.getChildNodes();
487 boolean just_wrote_newline = false; // Prevent two or more newlines in a row
488 for(int i = 0; i < command_elements.getLength(); i++) {
489 Node command_node = command_elements.item(i);
490 if(command_node instanceof Element) {
491 Element command_element = (Element) command_node;
492 // The only thing left are NewLine elements
493 if(command_element.getNodeName().equals(NEWLINE_ELEMENT) && !just_wrote_newline) {
494 buffered_writer.newLine();
495 just_wrote_newline = true;
496 }
497 // Anything else we write to file, but only if it has been assigned, the exception being the Indexes element which just get commented if unassigned (a side effect of MG && MGPP compatibility)
498 else if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR) || command_element.getNodeName().equals(INDEXES_ELEMENT) || command_element.getNodeName().equals(INDEX_DEFAULT_ELEMENT) || command_element.getNodeName().equals(LEVELS_ELEMENT)){
499 String command;
500 // format statements we write out with ex. still present
501 if (command_element.getNodeName().equals(FORMAT_ELEMENT)) {
502 command = toString(command_element, true);
503 } else {
504 command = toString(command_element, false);
505 }
506 if(command != null && command.length() > 0) {
507 write(buffered_writer, command);
508 buffered_writer.newLine();
509 just_wrote_newline = false;
510 }
511 }
512 }
513 }
514 buffered_writer.close();
515 // known_metadata = null; 'poof'
516 }
517 catch (Exception exception) {
518 DebugStream.println("Error in CollectionConfiguration.save(boolean): " + exception);
519 DebugStream.printStackTrace(exception);
520 }
521 }
522 }
523
524 /** ************************** Private Methods ***************************/
525
526 private String classifyToString(Element command_element, boolean show_extracted_namespace)
527 {
528 StringBuffer text = new StringBuffer(CLASSIFY_STR);
529 text.append(TAB_CHARACTER);
530 text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
531 text.append(SPACE_CHARACTER);
532 NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
533 int option_elements_length = option_elements.getLength();
534 for(int j = 0; j < option_elements_length; j++) {
535 Element option_element = (Element) option_elements.item(j);
536 if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
537 text.append(StaticStrings.MINUS_CHARACTER);
538 text.append(option_element.getAttribute(NAME_ATTRIBUTE));
539 String value_str = XMLTools.getValue(option_element);
540
541 // Remove the extracted metadata namespaces if required
542 if (value_str.length() > 0) {
543 StringTokenizer string_tokenizer = new StringTokenizer(value_str, ",");
544 value_str = "";
545 while (string_tokenizer.hasMoreElements()) {
546 String token = (String) string_tokenizer.nextElement();
547 MetadataElement metadata_element = MetadataTools.getMetadataElementWithDisplayName(token);
548 if (metadata_element != null) {
549 token = metadata_element.getFullName();
550 }
551
552 if (token.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
553 token = token.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
554 }
555
556 value_str = value_str + token;
557 if (string_tokenizer.hasMoreElements()) {
558 value_str = value_str + ",";
559 }
560 }
561 }
562
563 text.append(SPACE_CHARACTER);
564 if (value_str.indexOf(SPACE_CHARACTER) == -1) {
565 text.append(value_str);
566 }
567 else {
568 text.append(SPEECH_CHARACTER);
569 text.append(value_str);
570 text.append(SPEECH_CHARACTER);
571 }
572 value_str = null;
573 if(j < option_elements_length - 1) {
574 text.append(SPACE_CHARACTER);
575 }
576 }
577 option_element = null;
578 }
579 option_elements = null;
580 return text.toString();
581 }
582
583 private String formatToString(Element command_element, boolean show_extracted_namespace) {
584 StringBuffer text = new StringBuffer(FORMAT_STR);
585 text.append(SPACE_CHARACTER);
586 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
587 text.append(SPACE_CHARACTER);
588 String value_str = command_element.getAttribute(VALUE_ATTRIBUTE);
589 if(value_str.length() != 0) {
590 text.append(value_str);
591 }
592 else {
593 // Remember to encode format string to Greenstone specification
594 value_str = Codec.transform(XMLTools.getValue(command_element), Codec.DOM_TO_GREENSTONE);
595 // Remove any references to a namespace for extracted metadata
596 if (!show_extracted_namespace) {
597 String match_string = "\\[" + Utility.EXTRACTED_METADATA_NAMESPACE + "\\.";
598 value_str = value_str.replaceAll(match_string, "[");
599 }
600
601 text.append(SPEECH_CHARACTER);
602 text.append(value_str);
603 text.append(SPEECH_CHARACTER);
604 }
605 value_str = null;
606 return text.toString();
607 }
608
609 /** Retrieve or create the indexes Element. */
610 private Element getOrCreateElementByTagName(String name, String conditional_attribute, String required_value) {
611 Element document_element = document.getDocumentElement();
612 NodeList elements = document_element.getElementsByTagName(name);
613 int elements_length = elements.getLength();
614 if(elements_length > 0) {
615 if(conditional_attribute == null) {
616 document_element = null;
617 return (Element) elements.item(0);
618 }
619 else {
620 for(int i = 0; i < elements_length; i++) {
621 Element element = (Element) elements.item(i);
622 if(element.getAttribute(conditional_attribute).equals(required_value)) {
623 document_element = null;
624 return element;
625 }
626 element = null;
627 }
628 }
629 }
630 // Create the element
631 Element element = document.createElement(name);
632 // If there was a property set it
633 if(conditional_attribute != null) {
634 element.setAttribute(conditional_attribute, required_value);
635 }
636 Node target_node = findInsertionPoint(element);
637 if(target_node != null) {
638 document_element.insertBefore(element, target_node);
639 }
640 else {
641 document_element.appendChild(element);
642 }
643 document_element = null;
644 return element;
645 }
646
647 private String indexesToString(Element command_element, boolean show_extracted_namespace) {
648 boolean comment_only = false;
649 StringBuffer text = new StringBuffer("");
650 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
651 text.append("#");
652 comment_only = true;
653 }
654 text.append(INDEX_STR);
655 text.append(TAB_CHARACTER);
656 if(!comment_only) {
657 text.append(TAB_CHARACTER);
658 }
659 NodeList index_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
660 if (index_elements.getLength() == 0) { // no indexes
661 return "";
662 }
663 // For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list
664 int index_elements_length = index_elements.getLength();
665 for(int j = 0; j < index_elements_length; j++) {
666 Element index_element = (Element) index_elements.item(j);
667 String level_str = index_element.getAttribute(LEVEL_ATTRIBUTE);
668 if(level_str.length() > 0) {
669 text.append(level_str);
670 text.append(StaticStrings.COLON_CHARACTER);
671 }
672 NodeList content_elements = index_element.getElementsByTagName(CONTENT_ELEMENT);
673 int content_elements_length = content_elements.getLength();
674 // Don't output anything if no indexes are set
675 if(content_elements_length == 0) {
676 return null;
677 }
678 for(int k = 0; k < content_elements_length; k++) {
679 Element content_element = (Element) content_elements.item(k);
680 String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
681 if(!show_extracted_namespace && name_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
682 name_str = name_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
683 }
684 text.append(name_str);
685 name_str = null;
686 if(k < content_elements_length - 1) {
687 text.append(StaticStrings.COMMA_CHARACTER);
688 }
689 content_element = null;
690 }
691 if(j < index_elements_length - 1) {
692 text.append(SPACE_CHARACTER);
693 }
694 content_elements = null;
695 index_element = null;
696 }
697 index_elements = null;
698 return text.toString();
699 }
700
701 private String indexDefaultToString(Element command_element, boolean show_extracted_namespace) {
702 StringBuffer text = new StringBuffer("");
703 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
704 text.append("#");
705 }
706 text.append(INDEX_DEFAULT_STR);
707 text.append(TAB_CHARACTER);
708 text.append(command_element.getAttribute(LEVEL_ATTRIBUTE));
709 text.append(StaticStrings.COLON_CHARACTER);
710 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
711 int content_elements_length = content_elements.getLength();
712 for(int j = 0; j < content_elements_length; j++) {
713 Element content_element = (Element) content_elements.item(j);
714 String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
715 if(!show_extracted_namespace && name_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
716 name_str = name_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
717 }
718 text.append(name_str);
719 name_str = null;
720 if(j < content_elements_length - 1) {
721 text.append(StaticStrings.COMMA_CHARACTER);
722 }
723 content_element = null;
724 }
725 content_elements = null;
726 return text.toString();
727 }
728
729 private String languagesToString(Element command_element) {
730 StringBuffer text = new StringBuffer(LANGUAGES_STR);
731 text.append(TAB_CHARACTER);
732 // Retrieve all the languages and write them out in a space separated list
733 NodeList language_elements = command_element.getElementsByTagName(LANGUAGE_ELEMENT);
734 int language_elements_length = language_elements.getLength();
735 if(language_elements_length == 0) {
736 return null;
737 }
738 for(int j = 0; j < language_elements_length; j++) {
739 Element language_element = (Element) language_elements.item(j);
740 text.append(language_element.getAttribute(NAME_ATTRIBUTE));
741 if(j < language_elements_length - 1) {
742 text.append(SPACE_CHARACTER);
743 }
744 }
745 return text.toString();
746 }
747
748 private String languageDefaultToString(Element command_element) {
749 StringBuffer text = new StringBuffer(LANGUAGE_DEFAULT_STR);
750 text.append(TAB_CHARACTER);
751 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
752 return text.toString();
753 }
754
755 private String levelsToString(Element command_element) {
756 StringBuffer text = new StringBuffer("");
757 if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
758 text.append("#");
759 }
760 text.append(LEVELS_STR);
761 text.append(TAB_CHARACTER);
762 text.append(TAB_CHARACTER);
763 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
764 int content_elements_length = content_elements.getLength();
765 // Don't output anything if no levels are set.
766 if(content_elements_length == 0) {
767 return null;
768 }
769 for(int i = 0; i < content_elements_length; i++) {
770 Element content_element = (Element) content_elements.item(i);
771 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
772 text.append(SPACE_CHARACTER);
773 }
774 return text.substring(0, text.length() - 1);
775 }
776
777 static public String metadataToString(Element command_element, boolean text_value) {
778 boolean special = false;
779 // If there is no value attribute, then we don't write anything
780 StringBuffer text = new StringBuffer("");
781 String name_str = command_element.getAttribute(NAME_ATTRIBUTE);
782 // If the name is one of the special four, we don't write the collectionmeta first. Note the maintainer collectionmeta is singled out for 'prittying' reasons.
783 if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
784 text.append(name_str);
785 text.append(TAB_CHARACTER);
786 special = true;
787 }
788 else if (/* name_str.equals(COLLECTIONMETADATA_BETA_STR) || */ name_str.equals(COLLECTIONMETADATA_CREATOR_STR) || name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
789 text.append(name_str);
790 text.append(TAB_CHARACTER);
791 text.append(TAB_CHARACTER);
792 special = true;
793 }
794 else {
795 text.append(COLLECTIONMETADATA_STR);
796 text.append(TAB_CHARACTER);
797 text.append(name_str);
798 text.append(SPACE_CHARACTER);
799 String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE);
800 // If this is element is in english, and it is the first one found, we don't need to write the language argument.
801 //if(!language_str.equals(ENGLISH_LANGUAGE_STR) || known_metadata == null || known_metadata.contains(name_str)) {
802 // changed so that we always write the language string
803 text.append(LBRACKET_CHARACTER);
804 text.append(LANGUAGE_ARGUMENT);
805 text.append(language_str);
806 text.append(RBRACKET_CHARACTER);
807 text.append(SPACE_CHARACTER);
808 //}
809 if(known_metadata != null) {
810 known_metadata.add(name_str);
811 }
812 language_str = null;
813 }
814 name_str = null;
815
816 String value_str = XMLTools.getValue(command_element);
817 // The value string we retrieved will be encoded for xml, so we now decode it - to text if text_value set. This parameter was originally show_extracted_namespace, but sincethis is only true for 'toString()' commands from within the CDM, its good enough to determine if this toString() will be used to display on screen, or write to collect.cfg
818 if(text_value == CollectionMeta.TEXT) {
819 value_str = Codec.transform(value_str, Codec.DOM_TO_TEXT);
820 }
821 else {
822 value_str = Codec.transform(value_str, Codec.DOM_TO_GREENSTONE);
823 }
824
825 // We don't wrap the email addresses in quotes, nor the other special metadata
826 if(special) {
827 text.append(value_str);
828 }
829 else {
830 text.append(SPEECH_CHARACTER);
831 text.append(value_str);
832 text.append(SPEECH_CHARACTER);
833 }
834 value_str = null;
835 return text.toString();
836 }
837
838 /** Parse a collect.cfg into a DOM model representation. */
839 private void parse(File collect_config_file) {
840 try {
841 ArrayList acquired_collectionmeta_names = null;
842 ArrayList obsolete_collectionmeta_names = null;
843 HashMap changed_collectionmeta_names = null;
844
845 // Life is made oh-so-more tricky by the existance of G2.39 config files. There are two ways to handle them:
846 // 1. Notice that the file is G2.39 from the start, then as I parse it magic it into G2.4 standard
847 // 2. Extend the parsing method to handle reading in G2.39, then afterwards go through the DOM changing it to G2.4 as appropriate.
848 // As far as I can tell the second option is twice as much work, so I'll try option 1. The problem here is that I have to determine if the 'buildtype' command is somewhere in the collect.cfg file, which means I'm going to have to read the file twice - once seaching for 'buildtype' and the second time to parse it.
849
850 // Search for 'buildtype mgpp'
851 InputStream input_stream_one = new FileInputStream(collect_config_file);
852 Reader reader_one = new InputStreamReader(input_stream_one, ENCODING);
853 BufferedReader buffered_reader_one = new BufferedReader(reader_one);
854 String search_line_str = null;
855 while(!is_twopointthreenine && (search_line_str = buffered_reader_one.readLine()) != null) {
856 if(search_line_str.toLowerCase().indexOf(BUILDTYPE_STR) != -1) {
857 is_twopointthreenine = true;
858 acquired_collectionmeta_names = new ArrayList();
859 changed_collectionmeta_names = new HashMap();
860 obsolete_collectionmeta_names = new ArrayList();
861 }
862 }
863 buffered_reader_one.close();
864 reader_one.close();
865 input_stream_one.close();
866 buffered_reader_one = null;
867 reader_one = null;
868 input_stream_one = null;
869
870 Element collect_cfg_element = document.getDocumentElement();
871 // Read in the file one command at a time.
872 InputStream istream = new FileInputStream(collect_config_file);
873 Reader in_reader = new InputStreamReader(istream, ENCODING);
874 BufferedReader in = new BufferedReader(in_reader);
875 String command_str = null;
876 while((command_str = in.readLine()) != null) {
877 boolean append_element = true;
878 Element command_element = null;
879 // A command may be broken over several lines.
880 command_str = command_str.trim();
881 boolean eof = false;
882 while(!eof && command_str.endsWith(NEWLINE_CHARACTER)) {
883 String next_line = in.readLine();
884 if(next_line != null) {
885 next_line = next_line.trim();
886 if(next_line.length() > 0) {
887 // Remove the new line character
888 command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
889 // And append the next line, which due to the test above must be non-zero length
890 command_str = command_str + next_line;
891 }
892 next_line = null;
893 }
894 // If we've reached the end of the file theres nothing more we can do
895 else {
896 eof = true;
897 }
898 }
899 // If there is still a new line character, then we remove it and hope for the best
900 if(command_str.endsWith(NEWLINE_CHARACTER)) {
901 command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
902 }
903 // Now we've either got a command to parse...
904 if(command_str.length() != 0) {
905 // Start trying to figure out what it is
906 //StringTokenizer tokenizer = new StringTokenizer(command_str);
907 // Instead of a standard string tokenizer I'm going to use the new version of CommandTokenizer, which is not only smart enough to correctly notice speech marks and correctly parse them out, but now also takes the input stream so it can rebuild tokens that stretch over several lines.
908 CommandTokenizer tokenizer = new CommandTokenizer(command_str, in);
909 String command_type = tokenizer.nextToken().toLowerCase();
910 // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created
911 if(command_element == null && command_type.equals(BUILDTYPE_STR)) {
912 DebugStream.println("G2.39 Buildtype command detected. Ignoring.");
913 command_element = document.createElement(UNKNOWN_ELEMENT);
914 append_element = false;
915 }
916 if(command_element == null && command_type.equals(CLASSIFY_STR)) {
917 command_element = parseClassify(command_str);
918 }
919 if(command_element == null && command_type.equals(FORMAT_STR)) {
920 command_element = parseFormat(tokenizer); // Revised to handle multiple lines
921 }
922 if(command_element == null && (command_type.equals(INDEX_STR) || command_type.equals(COMMENTED_INDEXES_STR))) {
923 // If this was a G2.39 config file then we manipulate the command string a bit before we submit it to the parser. We start by adding allfields as the first index. We then space separate the remaining indexes, and remove duplicates when encountered. Of course before we do any of that we record the various space separated indexes so that we can remove the collection meta assigned to them.
924 if(is_twopointthreenine) {
925 DebugStream.println("G2.39 Index command detected. Modifying.");
926 DebugStream.println("Before: " + command_str);
927 StringBuffer new_command_str = new StringBuffer(command_type);
928 new_command_str.append(SPACE_CHARACTER);
929 new_command_str.append(ALLFIELDS_STR);
930 new_command_str.append(SPACE_CHARACTER);
931 ArrayList known_indexes = new ArrayList();
932 while(tokenizer.hasMoreTokens()) {
933 String old_index_str = tokenizer.nextToken();
934 // If this index is a combination of sources, then we need to remove the old collectionmeta, split up the compound index, then request new metadata be added for each part
935 if(old_index_str.indexOf(COMMA_CHARACTER) != -1) {
936 obsolete_collectionmeta_names.add(STOP_CHARACTER + old_index_str);
937 StringTokenizer string_tokenizer = new StringTokenizer(old_index_str, COMMA_CHARACTER);
938 while(string_tokenizer.hasMoreTokens()) {
939 String index_fragment_str = string_tokenizer.nextToken();
940 if(!known_indexes.contains(index_fragment_str)) {
941 known_indexes.add(index_fragment_str);
942 new_command_str.append(index_fragment_str);
943 new_command_str.append(SPACE_CHARACTER);
944 acquired_collectionmeta_names.add(STOP_CHARACTER + index_fragment_str);
945 }
946 index_fragment_str = null;
947 }
948 string_tokenizer = null;
949 }
950 // However if this was just a single index then a little choir of angels sing haleluja because we don't have to do -anything-. Nothing at all. Zip. Well no changes anyway. I obviously had to write this comment, and you can probably see, assuming you are not blind, that there are several lines of code below doing something, which is of course not nothing but something. And if we assume you are blind then you probably can't see the code, but then you probably didn't not see it doing the not nothing I said it would above.
951 else {
952 if(!known_indexes.contains(old_index_str)) {
953 known_indexes.add(old_index_str);
954 new_command_str.append(old_index_str);
955 new_command_str.append(SPACE_CHARACTER);
956 }
957 else {
958 // Use the collectionmeta for the single index instead of generating a default one
959 acquired_collectionmeta_names.remove(STOP_CHARACTER + old_index_str);
960 }
961 }
962 old_index_str = null;
963 }
964 known_indexes = null;
965 command_str = new_command_str.toString();
966 new_command_str = null;
967 DebugStream.println("After: " + command_str);
968 }
969 command_element = parseIndex(command_str);
970 }
971 if(command_element == null && (command_type.equals(INDEX_DEFAULT_STR) || command_type.equals(COMMENTED_INDEX_DEFAULT_STR))) {
972
973 command_element = parseIndexDefault(command_str);
974 // If this was a G2.39 config file then we squelch the default index (no such thing in G2.4)
975 if(is_twopointthreenine) {
976 DebugStream.println("G2.39 Default Index command detected. Ignoring.");
977 append_element = false;
978 }
979 }
980 if(command_element == null && command_type.equals(LANGUAGES_STR)) {
981 command_element = parseLanguage(command_str);
982 }
983 if(command_element == null && command_type.equals(LANGUAGE_DEFAULT_STR)) {
984 command_element = parseLanguageDefault(command_str);
985 }
986 if(command_element == null && (command_type.equals(LEVELS_STR) || command_type.equals(COMMENTED_LEVELS_STR))) {
987 // Again if this is G2.39 we have to do a tiny bit of magic to the levels command. We need to add document level, and change the remainder to lower case.
988 if(is_twopointthreenine) {
989 DebugStream.println("G2.39 Levels command detected. Modifying.");
990 DebugStream.println("Before: " + command_str);
991 StringBuffer new_command_str = new StringBuffer(command_type);
992 new_command_str.append(SPACE_CHARACTER);
993 new_command_str.append(DOCUMENT_STR);
994 while(tokenizer.hasMoreTokens()) {
995 String token = tokenizer.nextToken();
996 // Generate a lower case version
997 String token_lc = token.toLowerCase();
998 // If they are still the same then it is all good baby, otherwise we have to remember to transform their collectionmeta as well
999 if(!token.equals(token_lc)) {
1000 changed_collectionmeta_names.put(STOP_CHARACTER + token, STOP_CHARACTER + token_lc);
1001 }
1002 new_command_str.append(SPACE_CHARACTER);
1003 new_command_str.append(token_lc);
1004 token_lc = null;
1005 token = null;
1006 }
1007 command_str = new_command_str.toString();
1008 new_command_str = null;
1009 DebugStream.println("After: " + command_str);
1010 }
1011 command_element = parseLevels(command_str);
1012 }
1013 if(command_element == null && command_type.equals(COLLECTIONMETADATA_STR)) {
1014 command_element = parseMetadata(tokenizer); // Revised to handle multiple lines
1015 }
1016 if(command_element == null && (/* command_type.equals(COLLECTIONMETADATA_BETA_STR) || */ command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals(COLLECTIONMETADATA_CREATOR_STR) || command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR))) {
1017 command_element = parseMetadataSpecial(command_str);
1018 }
1019 if(command_element == null && command_type.equals(PLUGIN_STR)) {
1020 command_element = parsePlugIn(command_str);
1021 }
1022 if(command_element == null && command_type.equals(SEARCHTYPE_STR)) {
1023 command_element = parseSearchType(command_str);
1024 }
1025 if(command_element == null && command_type.equals(SUBCOLLECTION_STR)) {
1026 command_element = parseSubCollection(command_str);
1027 }
1028 if(command_element == null && command_type.equals(SUBCOLLECTION_DEFAULT_INDEX_STR)) {
1029 command_element = parseSubCollectionDefaultIndex(command_str);
1030 }
1031 if(command_element == null && command_type.equals(SUBCOLLECTION_INDEX_STR)) {
1032 command_element = parseSubCollectionIndex(command_str);
1033 }
1034 if(command_element == null && (command_type.equals(SUPERCOLLECTION_STR) || command_type.equals(CCS_STR))) {
1035 command_element = parseSuperCollection(command_str);
1036 }
1037 // Doesn't match any known type
1038 command_type = null;
1039 if(command_element == null) {
1040 // No-one knows what to do with this command, so we create an Unknown command element
1041 command_element = document.createElement(UNKNOWN_ELEMENT);
1042 XMLTools.setValue(command_element, command_str);
1043 }
1044 }
1045 // Or an empty line to remember for later
1046 else {
1047 command_element = document.createElement(NEWLINE_ELEMENT);
1048 }
1049 // Now command element shouldn't be null so we append it to the collection config DOM, but only if we haven't been told not to add it
1050 if(append_element) {
1051 collect_cfg_element.appendChild(command_element);
1052 }
1053 }
1054
1055 // We have completed parsing the collect configuration file. Now, if we are dealing with the G2.39 nightmare scenario, it's time to add the SearchType command and modify the collectionmeta commands as necessary.
1056 if(is_twopointthreenine) {
1057 Element search_type_element = getSearchType();
1058 search_type_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1059 while(search_type_element.hasChildNodes()) {
1060 search_type_element.removeChild(search_type_element.getFirstChild());
1061 }
1062 Element plain_search_type_element = document.createElement(CONTENT_ELEMENT);
1063 plain_search_type_element.setAttribute(NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[1]);
1064 search_type_element.appendChild(plain_search_type_element);
1065 plain_search_type_element = null;
1066 Element form_search_type_element = document.createElement(CONTENT_ELEMENT);
1067 form_search_type_element.setAttribute(NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]);
1068 search_type_element.appendChild(form_search_type_element);
1069 form_search_type_element = null;
1070 search_type_element = null;
1071
1072 // Search through the existing collectionmeta
1073 Element document_element = document.getDocumentElement();
1074 NodeList collectionmeta_elements = document_element.getElementsByTagName(COLLECTIONMETADATA_ELEMENT);
1075 DebugStream.println("There are " + obsolete_collectionmeta_names.size() + " collectionmeta to remove.");
1076 DebugStream.println("There are " + changed_collectionmeta_names.size() + " collectionmeta to change.");
1077 for(int z = collectionmeta_elements.getLength(); z > 0; z--) {
1078 Element collectionmeta_element = (Element) collectionmeta_elements.item(z - 1);
1079 String name = collectionmeta_element.getAttribute(NAME_ATTRIBUTE);
1080 DebugStream.println("Checking " + name);
1081 // Remove any obsolete metadata
1082 if(obsolete_collectionmeta_names.contains(name)) {
1083 DebugStream.println("G2.39 CollectMeta detected. Removing: " + name);
1084 document_element.removeChild(collectionmeta_element);
1085 }
1086 // We may have been asked to change the index name to lower case
1087 else if(changed_collectionmeta_names.containsKey(name)) {
1088 String new_name = (String) changed_collectionmeta_names.get(name);
1089 DebugStream.println("G2.39 CollectMeta detected. Changing: " + name + " -> " + new_name);
1090 collectionmeta_element.setAttribute(NAME_ATTRIBUTE, new_name);
1091 new_name = null;
1092 }
1093 name = null;
1094 }
1095
1096 // Finally add any newly acquired collectionmeta. This general defaults to the collectionmeta name less the full stop
1097 DebugStream.println("There are " + acquired_collectionmeta_names.size() + " collectionmeta to add.");
1098 for(int y = 0; y < acquired_collectionmeta_names.size(); y++) {
1099 String name = (String) acquired_collectionmeta_names.get(y);
1100 String value = name.substring(1);
1101 DebugStream.println("G2.39 CollectMeta missing. Adding: " + name + " [l=" + Configuration.getLanguage() + "] \"" + value + "\"");
1102 Element element = document.createElement(COLLECTIONMETADATA_ELEMENT);
1103 element.setAttribute(NAME_ATTRIBUTE, name);
1104 element.setAttribute(LANGUAGE_ATTRIBUTE, Configuration.getLanguage());
1105 element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1106 XMLTools.setValue(element, value);
1107 document_element.appendChild(element);
1108 element = null;
1109 value = null;
1110 name = null;
1111 }
1112
1113 document_element = null;
1114 }
1115 }
1116 catch(Exception exception) {
1117 DebugStream.println("Error in CollectionConfiguration.parse(java.io.File): " + exception);
1118 DebugStream.printStackTrace(exception);
1119 }
1120 }
1121
1122 private Element parseClassify(String command_str) {
1123 Element command_element = null;
1124 try {
1125 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1126 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1127 if(tokenizer.countTokens() >= 2) { // Must support "classify Phind" (no args)
1128 command_element = document.createElement(CLASSIFY_ELEMENT);
1129 // First token is classify
1130 tokenizer.nextToken();
1131 // The next token is the classifier type
1132 command_element.setAttribute(TYPE_ATTRIBUTE, tokenizer.nextToken());
1133 // Now we parse out the remaining arguments into a hashmapping from name to value
1134 HashMap arguments = parseArguments(tokenizer);
1135 // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
1136 Iterator names = arguments.keySet().iterator();
1137 while(names.hasNext()) {
1138 String name = (String) names.next();
1139 String value = (String) arguments.get(name); // Can be null
1140 // The metadata argument gets added as the content attribute
1141 if (name.equals(METADATA_ARGUMENT) && value != null) {
1142 // Add the extracted namespace onto un-namespaced metadata names
1143 StringTokenizer string_tokenizer = new StringTokenizer(value, ",");
1144 value = "";
1145 while (string_tokenizer.hasMoreElements()) {
1146 String token = (String) string_tokenizer.nextElement();
1147
1148 if (token.indexOf(StaticStrings.NS_SEP) == -1) {
1149 token = StaticStrings.EXTRACTED_NAMESPACE + token;
1150 }
1151 else {
1152 MetadataElement metadata_element = MetadataTools.getMetadataElementWithName(token);
1153 if (metadata_element != null) {
1154 token = metadata_element.getDisplayName();
1155 }
1156 }
1157
1158 if (!value.equals("")) {
1159 value = value + ",";
1160 }
1161 value = value + token;
1162 }
1163 }
1164 // Everything else is an Option Element
1165 Element option_element = document.createElement(OPTION_ELEMENT);
1166 option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
1167 if(value != null) {
1168 // Remove any speech marks appended in strings containing whitespace
1169 if(value.startsWith(SPEECH_CHARACTER) && value.endsWith(SPEECH_CHARACTER)) {
1170 value = value.substring(1, value.length() - 1);
1171 }
1172 XMLTools.setValue(option_element, value);
1173 }
1174 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1175 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
1176 command_element.appendChild(option_element);
1177 option_element = null;
1178 name = null;
1179 value = null;
1180 }
1181 names = null;
1182 arguments = null;
1183 }
1184 tokenizer = null;
1185 }
1186 catch(Exception error) {
1187 }
1188 return command_element;
1189 }
1190
1191 private Element parseFormat(CommandTokenizer tokenizer) {
1192 Element command_element = null;
1193 try {
1194 command_element = document.createElement(FORMAT_ELEMENT);
1195 String name_str = tokenizer.nextToken();
1196 String value_str = tokenizer.nextToken();
1197 if(name_str != null && value_str != null) {
1198 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1199 // If the value is true or false we add it as an attribute
1200 if(value_str.equalsIgnoreCase(TRUE_STR) || value_str.equalsIgnoreCase(FALSE_STR)) {
1201 command_element.setAttribute(VALUE_ATTRIBUTE, value_str.toLowerCase());
1202 }
1203 // Otherwise it gets added as a text node
1204 else {
1205 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
1206 value_str = Codec.transform(value_str, Codec.GREENSTONE_TO_DOM);
1207 XMLTools.setValue(command_element, value_str);
1208 }
1209 }
1210 else {
1211 command_element = null;
1212 }
1213 name_str = null;
1214 value_str = null;
1215 }
1216 catch (Exception exception) {
1217 DebugStream.printStackTrace(exception);
1218 command_element = null;
1219 }
1220 return command_element;
1221 }
1222
1223 private Element parseIndex(String command_str) {
1224 Element command_element = null;
1225 try {
1226 StringTokenizer tokenizer = new StringTokenizer(command_str);
1227 String command = tokenizer.nextToken();
1228 command_element = document.createElement(INDEXES_ELEMENT);
1229 command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(INDEX_STR) ? TRUE_STR : FALSE_STR));
1230 command = null;
1231 if(!tokenizer.hasMoreTokens()) {
1232
1233 // there are no indexes
1234 command_element.setAttribute(ASSIGNED_ATTRIBUTE, FALSE_STR);
1235 command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR); // for now
1236 tokenizer = null;
1237 return command_element;
1238 }
1239
1240 while(tokenizer.hasMoreTokens()) {
1241 Element index_element = document.createElement(INDEX_ELEMENT);
1242 String index_str = tokenizer.nextToken();
1243 // There are two types of index we have to consider. Old G2.38 and earlier use level:source tuplets while G2.39+ have just a single, non-comma separated list where order is important.
1244 boolean old_index;
1245 if(index_str.indexOf(COLON_CHARACTER) != -1) {
1246 old_index = true;
1247 index_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
1248 index_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
1249 command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR);
1250 }
1251 else {
1252 command_element.setAttribute(MGPP_ATTRIBUTE, TRUE_STR);
1253 old_index = false;
1254 }
1255 StringTokenizer content_tokenizer = new StringTokenizer(index_str, StaticStrings.COMMA_CHARACTER);
1256 while(content_tokenizer.hasMoreTokens()) {
1257 Element content_element = document.createElement(CONTENT_ELEMENT);
1258 String content_str = content_tokenizer.nextToken();
1259 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1260 if(content_str.indexOf(StaticStrings.NS_SEP) == -1) {
1261 if(content_str.equals(StaticStrings.TEXT_STR) || (!old_index && content_str.equals(StaticStrings.ALLFIELDS_STR))) {
1262 // Our special strings are OK.
1263 }
1264 else {
1265 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
1266 }
1267 }
1268 content_element.setAttribute(NAME_ATTRIBUTE, content_str);
1269 index_element.appendChild(content_element);
1270 content_element = null;
1271 }
1272 content_tokenizer = null;
1273 index_str = null;
1274 command_element.appendChild(index_element);
1275 index_element = null;
1276 }
1277 tokenizer = null;
1278 }
1279 catch (Exception exception) {
1280 exception.printStackTrace();
1281 }
1282 return command_element;
1283 }
1284
1285 private Element parseIndexDefault(String command_str) {
1286 Element command_element = null;
1287 try {
1288 StringTokenizer tokenizer = new StringTokenizer(command_str);
1289 if(tokenizer.countTokens() >= 2) {
1290 command_element = document.createElement(INDEX_DEFAULT_ELEMENT);
1291 command_element.setAttribute(ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken()).equals(INDEX_DEFAULT_STR) ? TRUE_STR : FALSE_STR));
1292 String index_str = tokenizer.nextToken();
1293 command_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
1294 String content_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
1295 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1296 while(content_tokenizer.hasMoreTokens()) {
1297 Element content_element = document.createElement(CONTENT_ELEMENT);
1298 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1299 command_element.appendChild(content_element);
1300 content_element = null;
1301 }
1302 content_tokenizer = null;
1303 content_str = null;
1304 content_str = null;
1305 index_str = null;
1306 }
1307 tokenizer = null;
1308 }
1309 catch (Exception exception) {
1310 }
1311 return command_element;
1312 }
1313
1314 private Element parseLanguage(String command_str) {
1315 Element command_element = null;
1316 try {
1317 StringTokenizer tokenizer = new StringTokenizer(command_str);
1318 tokenizer.nextToken();
1319 if(tokenizer.hasMoreTokens()) {
1320 command_element = document.createElement(LANGUAGES_ELEMENT);
1321 while(tokenizer.hasMoreTokens()) {
1322 Element language_element = document.createElement(LANGUAGE_ELEMENT);
1323 language_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1324 command_element.appendChild(language_element);
1325 language_element = null;
1326 }
1327 }
1328 tokenizer = null;
1329 }
1330 catch (Exception exception) {
1331 }
1332 return command_element;
1333 }
1334
1335 private Element parseLanguageDefault(String command_str) {
1336 Element command_element = null;
1337 try {
1338 StringTokenizer tokenizer = new StringTokenizer(command_str);
1339 if(tokenizer.countTokens() >= 2) {
1340 command_element = document.createElement(LANGUAGE_DEFAULT_ELEMENT);
1341 tokenizer.nextToken();
1342 String default_language_str = tokenizer.nextToken();
1343 command_element.setAttribute(NAME_ATTRIBUTE, default_language_str);
1344 command_element.setAttribute(ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1345 default_language_str = null;
1346 }
1347 tokenizer = null;
1348 }
1349 catch (Exception exception) {
1350 }
1351 return command_element;
1352 }
1353
1354 private Element parseLevels(String command_str) {
1355 Element command_element = null;
1356 try {
1357 StringTokenizer tokenizer = new StringTokenizer(command_str);
1358 // First token is command type (levels)
1359 String command = tokenizer.nextToken();
1360 if(tokenizer.hasMoreTokens()) {
1361 command_element = document.createElement(LEVELS_ELEMENT);
1362 command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(LEVELS_STR) ? TRUE_STR : FALSE_STR));
1363 while(tokenizer.hasMoreTokens()) {
1364 Element level_element = document.createElement(CONTENT_ELEMENT);
1365 level_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1366 command_element.appendChild(level_element);
1367 level_element = null;
1368 }
1369 }
1370 command = null;
1371 }
1372 catch(Exception exception) {
1373 }
1374 return command_element;
1375 }
1376
1377 private Element parseMetadata(CommandTokenizer tokenizer) {
1378 Element command_element = null;
1379 try {
1380 command_element = document.createElement(COLLECTIONMETADATA_ELEMENT);
1381 String name_str = tokenizer.nextToken();
1382 String value_str = tokenizer.nextToken();
1383 if(name_str != null && value_str != null) {
1384 String language_str = Configuration.getLanguage();
1385 // Check if the value string is actually a language string
1386 if(value_str.startsWith(LBRACKET_CHARACTER) && value_str.endsWith(RBRACKET_CHARACTER)) {
1387 language_str = value_str.substring(value_str.indexOf(LANGUAGE_ARGUMENT) + 2, value_str.length() - 1);
1388 value_str = tokenizer.nextToken();
1389 }
1390 if(value_str != null) {
1391 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
1392 value_str = Codec.transform(value_str, Codec.GREENSTONE_TO_DOM);
1393 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1394 command_element.setAttribute(LANGUAGE_ATTRIBUTE, language_str);
1395 command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1396 XMLTools.setValue(command_element, value_str);
1397 }
1398 else {
1399 command_element = null;
1400 }
1401 language_str = null;
1402 }
1403 else {
1404 command_element = null;
1405 }
1406 name_str = null;
1407 value_str = null;
1408 }
1409 catch (Exception exception) {
1410 DebugStream.printStackTrace(exception);
1411 command_element = null;
1412 }
1413 return command_element;
1414 }
1415
1416 private Element parseMetadataSpecial(String command_str) {
1417 Element command_element = null;
1418 try {
1419 StringTokenizer tokenizer = new StringTokenizer(command_str);
1420 if(tokenizer.countTokens() >= 2) {
1421 String name_str = tokenizer.nextToken();
1422 String value_str = tokenizer.nextToken();
1423 // if(name_str.equals(COLLECTIONMETADATA_BETA_STR)) {
1424 // command_element = document.createElement(COLLECTIONMETADATA_BETA_ELEMENT);
1425 // }
1426 if (name_str.equals(COLLECTIONMETADATA_CREATOR_STR)) {
1427 command_element = document.createElement(COLLECTIONMETADATA_CREATOR_ELEMENT);
1428 }
1429 else if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
1430 command_element = document.createElement(COLLECTIONMETADATA_MAINTAINER_ELEMENT);
1431 }
1432 else if(name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
1433 command_element = document.createElement(COLLECTIONMETADATA_PUBLIC_ELEMENT);
1434 }
1435 if(command_element != null) {
1436 command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1437 command_element.setAttribute(LANGUAGE_ATTRIBUTE, ENGLISH_LANGUAGE_STR);
1438 command_element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
1439 command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1440 if(value_str.startsWith(SPEECH_CHARACTER) && value_str.endsWith(SPEECH_CHARACTER)) {
1441 value_str = value_str.substring(1, value_str.length() - 1);
1442 }
1443 XMLTools.setValue(command_element, value_str);
1444 }
1445 value_str = null;
1446 name_str = null;
1447 }
1448 tokenizer = null;
1449 }
1450 catch (Exception exception) {
1451 }
1452 return command_element;
1453 }
1454
1455 private Element parsePlugIn(String command_str) {
1456 Element command_element = null;
1457 boolean use_metadata_files = false;
1458 try {
1459 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1460 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1461 if(tokenizer.countTokens() >= 2) {
1462 command_element = document.createElement(PLUGIN_ELEMENT);
1463 // First token is plugin
1464 tokenizer.nextToken();
1465 // The next token is the type
1466 String type = tokenizer.nextToken();
1467 command_element.setAttribute(TYPE_ATTRIBUTE, type);
1468 // Now we parse out the remaining arguments into a hashmapping from name to value
1469 HashMap arguments = parseArguments(tokenizer);
1470 // Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
1471 Iterator names = arguments.keySet().iterator();
1472 while(names.hasNext()) {
1473 String name = (String) names.next();
1474 String value = (String) arguments.get(name); // Can be null
1475 Element option_element = document.createElement(OPTION_ELEMENT);
1476 if(name.substring(1).equals(USE_METADATA_FILES_ARGUMENT)) {
1477 use_metadata_files = true;
1478 }
1479 option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
1480 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1481 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); // All arguments are considered to be custom until matched against base plugins arguments
1482 if(value != null) {
1483 // Remove any speech marks appended in strings containing whitespace
1484 if(value.startsWith(SPEECH_CHARACTER) && value.endsWith(SPEECH_CHARACTER)) {
1485 value = value.substring(1, value.length() - 1);
1486 }
1487 if(name.equals(METADATA_ARGUMENT)) {
1488 // The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace.
1489 if(value.indexOf(StaticStrings.NS_SEP) == -1) {
1490 value = StaticStrings.EXTRACTED_NAMESPACE + value;
1491 }
1492 }
1493 XMLTools.setValue(option_element, value);
1494 }
1495 command_element.appendChild(option_element);
1496 option_element = null;
1497 name = null;
1498 value = null;
1499 }
1500
1501 // We must have some RecPlug options: use_metadata_files, and show_progress
1502 if (type.equals(RECPLUG_STR)) {
1503 if (!use_metadata_files) {
1504 Element option_element = document.createElement(OPTION_ELEMENT);
1505 option_element.setAttribute(NAME_ATTRIBUTE, USE_METADATA_FILES_ARGUMENT);
1506 option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1507 option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
1508 command_element.appendChild(option_element);
1509 option_element = null;
1510 }
1511 }
1512 type = null;
1513 names = null;
1514 arguments = null;
1515 }
1516 tokenizer = null;
1517 }
1518 catch(Exception exception) {
1519 }
1520 return command_element;
1521 }
1522
1523 private Element parseSearchType(String command_str) {
1524 Element command_element = null;
1525 try {
1526 StringTokenizer tokenizer = new StringTokenizer(command_str);
1527 // First token is command type (levels)
1528 tokenizer.nextToken();
1529 if(tokenizer.hasMoreTokens()) {
1530 command_element = document.createElement(SEARCHTYPE_ELEMENT);
1531 command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1532 while(tokenizer.hasMoreTokens()) {
1533 Element search_element = document.createElement(CONTENT_ELEMENT);
1534 search_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1535 command_element.appendChild(search_element);
1536 search_element = null;
1537 }
1538 }
1539 }
1540 catch(Exception exception) {
1541 }
1542 return command_element;
1543 }
1544
1545 private Element parseSubCollection(String command_str) {
1546 Element command_element = null;
1547 try {
1548 CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1549 if(tokenizer.countTokens() >= 3) {
1550 command_element = document.createElement(SUBCOLLECTION_ELEMENT);
1551 // First token is command type
1552 tokenizer.nextToken();
1553 // Then subcollection identifier
1554 command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1555 // Then finally the pattern used to build the subcollection partition
1556 String full_pattern_str = tokenizer.nextToken();
1557 // Set inclusion/exclusion flag and remove any exclamation mark
1558 boolean exclusion = full_pattern_str.startsWith(EXCLAMATION_CHARACTER);
1559 if (exclusion) {
1560 full_pattern_str = full_pattern_str.substring(1, full_pattern_str.length());
1561 command_element.setAttribute(TYPE_ATTRIBUTE, EXCLUDE_STR);
1562 }
1563 else {
1564 command_element.setAttribute(TYPE_ATTRIBUTE, INCLUDE_STR);
1565 }
1566 StringTokenizer pattern_tokenizer = new StringTokenizer(full_pattern_str, SEPARATOR_CHARACTER);
1567 if(pattern_tokenizer.countTokens() >= 2) {
1568 String content_str = pattern_tokenizer.nextToken();
1569 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1570 if(!content_str.equals(StaticStrings.FILENAME_STR) && content_str.indexOf(StaticStrings.NS_SEP) == -1) {
1571 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
1572 }
1573 command_element.setAttribute(CONTENT_ATTRIBUTE, content_str);
1574 XMLTools.setValue(command_element, pattern_tokenizer.nextToken());
1575 if(pattern_tokenizer.hasMoreTokens()) {
1576 command_element.setAttribute(OPTIONS_ATTRIBUTE, pattern_tokenizer.nextToken());
1577 }
1578 }
1579 pattern_tokenizer = null;
1580 }
1581 }
1582 catch(Exception exception) {
1583 exception.printStackTrace();
1584 }
1585 return command_element;
1586 }
1587
1588 private Element parseSubCollectionDefaultIndex(String command_str) {
1589 Element command_element = null;
1590 try {
1591 StringTokenizer tokenizer = new StringTokenizer(command_str);
1592 if(tokenizer.countTokens() == 2) {
1593 command_element = document.createElement(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT);
1594 tokenizer.nextToken();
1595 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1596 String content_str = tokenizer.nextToken();
1597 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1598 while(content_tokenizer.hasMoreTokens()) {
1599 Element content_element = document.createElement(CONTENT_ELEMENT);
1600 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1601 command_element.appendChild(content_element);
1602 content_element = null;
1603 }
1604 content_tokenizer = null;
1605 content_str = null;
1606 }
1607 tokenizer = null;
1608 }
1609 catch(Exception exception) {
1610 }
1611 return command_element;
1612 }
1613
1614 private Element parseSubCollectionIndex(String command_str) {
1615 Element command_element = null;
1616 try {
1617 StringTokenizer tokenizer = new StringTokenizer(command_str);
1618 tokenizer.nextToken();
1619 if(tokenizer.hasMoreTokens()) {
1620 command_element = document.createElement(SUBCOLLECTION_INDEXES_ELEMENT);
1621 }
1622 while(tokenizer.hasMoreTokens()) {
1623 Element subcollectionindex_element = document.createElement(INDEX_ELEMENT);
1624 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1625 String content_str = tokenizer.nextToken();
1626 StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1627 while(content_tokenizer.hasMoreTokens()) {
1628 Element content_element = document.createElement(CONTENT_ELEMENT);
1629 content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1630 subcollectionindex_element.appendChild(content_element);
1631 content_element = null;
1632 }
1633 content_tokenizer = null;
1634 content_str = null;
1635 command_element.appendChild(subcollectionindex_element);
1636 subcollectionindex_element = null;
1637 }
1638 tokenizer = null;
1639 }
1640 catch (Exception exception) {
1641 }
1642 return command_element;
1643 }
1644
1645 private Element parseSuperCollection(String command_str) {
1646 Element command_element = null;
1647 try {
1648 StringTokenizer tokenizer = new StringTokenizer(command_str);
1649 if(tokenizer.countTokens() >= 3) {
1650 command_element = document.createElement(SUPERCOLLECTION_ELEMENT);
1651 tokenizer.nextToken();
1652 while(tokenizer.hasMoreTokens()) {
1653 Element collection_element = document.createElement(COLLECTION_ELEMENT);
1654 collection_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1655 command_element.appendChild(collection_element);
1656 collection_element = null;
1657 }
1658 }
1659 tokenizer = null;
1660 }
1661 catch(Exception exception) {
1662 }
1663 return command_element;
1664 }
1665
1666 private String pluginToString(Element command_element, boolean show_extracted_namespace) {
1667 StringBuffer text = new StringBuffer();
1668 if(!command_element.getAttribute(SEPARATOR_ATTRIBUTE).equals(TRUE_STR)) {
1669 text.append(PLUGIN_STR);
1670 text.append(TAB_CHARACTER);
1671 text.append(TAB_CHARACTER);
1672 text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
1673 // Retrieve, and output, the arguments
1674 NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
1675 int option_elements_length = option_elements.getLength();
1676 if(option_elements_length > 0) {
1677 text.append(SPACE_CHARACTER);
1678 for(int j = 0; j < option_elements_length; j++) {
1679 Element option_element = (Element) option_elements.item(j);
1680 if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1681 text.append(StaticStrings.MINUS_CHARACTER);
1682 text.append(option_element.getAttribute(NAME_ATTRIBUTE));
1683 String value_str = XMLTools.getValue(option_element);
1684 if(!show_extracted_namespace && value_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
1685 value_str = value_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
1686 }
1687 if(value_str.length() > 0) {
1688 text.append(SPACE_CHARACTER);
1689 if(value_str.indexOf(SPACE_CHARACTER) == -1) {
1690 text.append(value_str);
1691 }
1692 else {
1693 text.append(SPEECH_CHARACTER);
1694 text.append(value_str);
1695 text.append(SPEECH_CHARACTER);
1696 }
1697 }
1698 value_str = null;
1699 if(j < option_elements_length - 1) {
1700 text.append(SPACE_CHARACTER);
1701 }
1702 }
1703 option_element = null;
1704 }
1705 }
1706 option_elements = null;
1707 }
1708 return text.toString();
1709 }
1710
1711 private String searchtypeToString(Element command_element) {
1712 if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1713 StringBuffer text = new StringBuffer(SEARCHTYPE_STR);
1714 text.append(TAB_CHARACTER);
1715 NodeList search_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1716 int search_elements_length = search_elements.getLength();
1717 for(int i = 0; i < search_elements_length; i++) {
1718 Element search_element = (Element) search_elements.item(i);
1719 text.append(search_element.getAttribute(NAME_ATTRIBUTE));
1720 text.append(SPACE_CHARACTER);
1721 }
1722 return text.substring(0, text.length() - 1);
1723 }
1724 else {
1725 return null;
1726 }
1727 }
1728
1729 private String subcollectionToString(Element command_element, boolean show_extracted_namespace) {
1730 StringBuffer text = new StringBuffer(SUBCOLLECTION_STR);
1731 text.append(SPACE_CHARACTER);
1732 text.append(command_element.getAttribute(NAME_ATTRIBUTE));
1733 text.append(SPACE_CHARACTER);
1734 text.append(TAB_CHARACTER);
1735 text.append(SPEECH_CHARACTER);
1736 if(command_element.getAttribute(TYPE_ATTRIBUTE).equals(EXCLUDE_STR)) {
1737 text.append(EXCLAMATION_CHARACTER);
1738 }
1739 String content_str = command_element.getAttribute(CONTENT_ATTRIBUTE);
1740 if(!show_extracted_namespace && content_str.startsWith(StaticStrings.EXTRACTED_NAMESPACE)) {
1741 content_str = content_str.substring(StaticStrings.EXTRACTED_NAMESPACE.length());
1742 }
1743 text.append(content_str);
1744 content_str = null;
1745 text.append(SEPARATOR_CHARACTER);
1746 text.append(XMLTools.getValue(command_element));
1747 text.append(SEPARATOR_CHARACTER);
1748 String options_str = command_element.getAttribute(OPTIONS_ATTRIBUTE);
1749 if(options_str.length() > 0) {
1750 text.append(options_str);
1751 }
1752 options_str = null;
1753 text.append(SPEECH_CHARACTER);
1754 return text.toString();
1755 }
1756
1757 private String subcollectionDefaultIndexToString(Element command_element) {
1758 StringBuffer text = new StringBuffer(SUBCOLLECTION_DEFAULT_INDEX_STR);
1759 text.append(TAB_CHARACTER);
1760 NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1761 int content_elements_length = content_elements.getLength();
1762 for(int j = 0; j < content_elements_length; j++) {
1763 Element content_element = (Element) content_elements.item(j);
1764 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1765 if(j < content_elements_length - 1) {
1766 text.append(StaticStrings.COMMA_CHARACTER);
1767 }
1768 }
1769 return text.toString();
1770 }
1771
1772 private String subcollectionIndexesToString(Element command_element) {
1773 StringBuffer text = new StringBuffer(SUBCOLLECTION_INDEX_STR);
1774 text.append(TAB_CHARACTER);
1775 // Retrieve all of the subcollection index partitions
1776 NodeList subcollectionindex_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
1777 int subcollectionindex_elements_length = subcollectionindex_elements.getLength();
1778 if(subcollectionindex_elements_length == 0) {
1779 return null;
1780 }
1781 for(int j = 0; j < subcollectionindex_elements_length; j++) {
1782 Element subcollectionindex_element = (Element) subcollectionindex_elements.item(j);
1783 NodeList content_elements = subcollectionindex_element.getElementsByTagName(CONTENT_ELEMENT);
1784 int content_elements_length = content_elements.getLength();
1785 for(int k = 0; k < content_elements_length; k++) {
1786 Element content_element = (Element) content_elements.item(k);
1787 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1788 if(k < content_elements_length - 1) {
1789 text.append(StaticStrings.COMMA_CHARACTER);
1790 }
1791 }
1792 if(j < subcollectionindex_elements_length - 1) {
1793 text.append(SPACE_CHARACTER);
1794 }
1795 }
1796 return text.toString();
1797 }
1798
1799 private String supercollectionToString(Element command_element) {
1800 NodeList content_elements = command_element.getElementsByTagName(COLLECTION_ELEMENT);
1801 int content_elements_length = content_elements.getLength();
1802 if(content_elements_length > 1) {
1803 StringBuffer text = new StringBuffer(SUPERCOLLECTION_STR);
1804 text.append(TAB_CHARACTER);
1805 for(int j = 0; j < content_elements_length; j++) {
1806 Element content_element = (Element) content_elements.item(j);
1807 text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1808 if(j < content_elements_length - 1) {
1809 text.append(SPACE_CHARACTER);
1810 }
1811 }
1812 return text.toString();
1813 }
1814 return null;
1815 }
1816
1817 private String unknownToString(Element command_element) {
1818 return XMLTools.getValue(command_element);
1819 }
1820
1821 /** Write the text to the buffer. This is used so we don't have to worry about storing intermediate String values just so we can calaulate length and offset.
1822 * @param writer the BufferedWriter to which the str will be written
1823 * @param str the String to be written
1824 */
1825 private void write(BufferedWriter writer, String str)
1826 throws IOException {
1827 writer.write(str, 0, str.length());
1828 }
1829}
Note: See TracBrowser for help on using the repository browser.