source: gli/trunk/src/org/greenstone/gatherer/cdm/CollectCfgReadWrite.java@ 20450

Last change on this file since 20450 was 20450, checked in by kjdon, 15 years ago

removed some commented out code, did a bit of tidying, no longer treat the metadata option specially when parsing plugins - which is good, as they don't have a metadata element.

File size: 57.3 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Methods to read collect.cfg files into internal XML form, and write
9 * them back out again.
10 *
11 * Copyright (C) 1999 New Zealand Digital Library Project
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 *########################################################################
27 */
28package org.greenstone.gatherer.cdm;
29
30import java.io.BufferedReader;
31import java.io.File;
32import java.io.FileInputStream;
33import java.io.InputStream;
34import java.io.InputStreamReader;
35import java.io.Reader;
36
37import java.util.HashMap;
38import java.util.Iterator;
39import java.util.StringTokenizer;
40
41import org.greenstone.gatherer.DebugStream;
42import org.greenstone.gatherer.Configuration;
43import org.greenstone.gatherer.metadata.MetadataElement;
44import org.greenstone.gatherer.metadata.MetadataTools;
45import org.greenstone.gatherer.util.Codec;
46import org.greenstone.gatherer.util.XMLTools;
47import org.greenstone.gatherer.util.StaticStrings;
48import org.greenstone.gatherer.util.Utility;
49
50import org.w3c.dom.*;
51
52public class CollectCfgReadWrite {
53
54
55 static public String toString (Element command_element) {
56 String command_element_name = command_element.getNodeName ();
57 if(command_element_name.equals (StaticStrings.CLASSIFY_ELEMENT)) {
58 return classifyToString (command_element);
59 }
60 else if(command_element_name.equals (StaticStrings.FORMAT_ELEMENT)) {
61 return formatToString (command_element);
62 }
63 else if(command_element_name.equals (StaticStrings.INDEXES_ELEMENT)) {
64 return indexesToString (command_element);
65 }
66 else if(command_element_name.equals (StaticStrings.INDEX_DEFAULT_ELEMENT)) {
67 return indexDefaultToString (command_element);
68 }
69 else if(command_element_name.equals (StaticStrings.LANGUAGES_ELEMENT)) {
70 return languagesToString (command_element);
71 }
72 else if(command_element_name.equals (StaticStrings.LANGUAGE_DEFAULT_ELEMENT)) {
73 return languageDefaultToString (command_element);
74 }
75 else if (command_element_name.equals (StaticStrings.LANGUAGE_METADATA_ELEMENT)) {
76 return languageMetadataToString (command_element);
77 }
78 else if(command_element_name.equals (StaticStrings.INDEXOPTIONS_ELEMENT)) {
79 return indexOptionsToString (command_element);
80 }
81 else if(command_element_name.equals (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT)) {
82 return indexOptionDefaultToString (command_element);
83 }
84 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_ELEMENT)) {
85 return metadataToString (command_element);
86 }
87 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT)) {
88 return metadataToString (command_element);
89 }
90 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT)) {
91 return metadataToString (command_element);
92 }
93 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT)) {
94 return metadataToString (command_element);
95 }
96 else if (command_element_name.equals (StaticStrings.BUILDTYPE_ELEMENT)) {
97 return metadataToString (command_element);
98 }
99 else if(command_element_name.equals (StaticStrings.PLUGIN_ELEMENT)) {
100 return pluginToString (command_element);
101 }
102 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_ELEMENT)) {
103 return subcollectionToString (command_element);
104 }
105 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) {
106 return subcollectionDefaultIndexToString (command_element);
107 }
108 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT)) {
109 return subcollectionIndexesToString (command_element);
110 }
111 else if(command_element_name.equals (StaticStrings.SUPERCOLLECTION_ELEMENT)) {
112 return supercollectionToString (command_element);
113 }
114 else if(command_element_name.equals (StaticStrings.UNKNOWN_ELEMENT)) {
115 return unknownToString (command_element);
116 }
117 return "";
118 }
119
120 static private String classifyToString (Element command_element) {
121 StringBuffer text = new StringBuffer (StaticStrings.CLASSIFY_STR);
122 text.append (StaticStrings.TAB_CHARACTER);
123 text.append (command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE));
124 NodeList option_elements = command_element.getElementsByTagName (StaticStrings.OPTION_ELEMENT);
125 int option_elements_length = option_elements.getLength ();
126 for(int j = 0; j < option_elements_length; j++) {
127 Element option_element = (Element) option_elements.item (j);
128 if(option_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
129 text.append (StaticStrings.SPACE_CHARACTER);
130 text.append (StaticStrings.MINUS_CHARACTER);
131 text.append (option_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
132 String value_str = XMLTools.getValue (option_element);
133
134 if (value_str.length () > 0) {
135 text.append (StaticStrings.SPACE_CHARACTER);
136 if(value_str.indexOf (StaticStrings.SPACE_CHARACTER) != -1) {
137 // enclose in quotes
138 text.append(StaticStrings.SPEECH_CHARACTER);
139 text.append(value_str);
140 text.append(StaticStrings.SPEECH_CHARACTER);
141 } else {
142
143 text.append(value_str);
144 }
145 }
146
147 value_str = null;
148 }
149 option_element = null;
150 }
151 option_elements = null;
152 return text.toString ();
153 }
154
155 static private String formatToString (Element command_element) {
156 StringBuffer text = new StringBuffer (StaticStrings.FORMAT_STR);
157 text.append (StaticStrings.SPACE_CHARACTER);
158 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
159 text.append (StaticStrings.SPACE_CHARACTER);
160 String value_str = command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE);
161 if(value_str.length () != 0) {
162 text.append (value_str);
163 }
164 else {
165 // Remember to encode format string to Greenstone specification
166 value_str = Codec.transform (XMLTools.getValue (command_element), Codec.DOM_TO_GREENSTONE);
167 text.append (StaticStrings.SPEECH_CHARACTER);
168 text.append (value_str);
169 text.append (StaticStrings.SPEECH_CHARACTER);
170 }
171 value_str = null;
172 return text.toString ();
173 }
174
175 static private String indexesToString (Element command_element) {
176 boolean comment_only = false;
177 StringBuffer text = new StringBuffer ("");
178 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
179 text.append ("#");
180 comment_only = true;
181 }
182 text.append (StaticStrings.INDEX_STR);
183 text.append (StaticStrings.TAB_CHARACTER);
184 if(!comment_only) {
185 text.append (StaticStrings.TAB_CHARACTER);
186 }
187 NodeList index_elements = command_element.getElementsByTagName (StaticStrings.INDEX_ELEMENT);
188 if (index_elements.getLength () == 0) { // no indexes
189 return "";
190 }
191 // For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list
192 int index_elements_length = index_elements.getLength ();
193 for(int j = 0; j < index_elements_length; j++) {
194 Element index_element = (Element) index_elements.item (j);
195 String level_str = index_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE);
196 if(level_str.length () > 0) {
197 text.append (level_str);
198 text.append (StaticStrings.COLON_CHARACTER);
199 }
200 NodeList content_elements = index_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
201 int content_elements_length = content_elements.getLength ();
202 // Don't output anything if no indexes are set
203 if(content_elements_length == 0) {
204 return null;
205 }
206 for(int k = 0; k < content_elements_length; k++) {
207 Element content_element = (Element) content_elements.item (k);
208 String name_str = content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
209 text.append (name_str);
210 name_str = null;
211 if(k < content_elements_length - 1) {
212 text.append (StaticStrings.COMMA_CHARACTER);
213 }
214 content_element = null;
215 }
216 if(j < index_elements_length - 1) {
217 text.append (StaticStrings.SPACE_CHARACTER);
218 }
219 content_elements = null;
220 index_element = null;
221 }
222 index_elements = null;
223 return text.toString ();
224 }
225
226 static private String indexDefaultToString (Element command_element) {
227 StringBuffer text = new StringBuffer ("");
228 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
229 text.append ("#");
230 }
231 text.append (StaticStrings.INDEX_DEFAULT_STR);
232 text.append (StaticStrings.TAB_CHARACTER);
233 if (!command_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE).equals ("")) {
234 text.append (command_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE));
235 text.append (StaticStrings.COLON_CHARACTER);
236 }
237 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
238 int content_elements_length = content_elements.getLength ();
239 for(int j = 0; j < content_elements_length; j++) {
240 Element content_element = (Element) content_elements.item (j);
241 String name_str = content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
242 text.append (name_str);
243 name_str = null;
244 if(j < content_elements_length - 1) {
245 text.append (StaticStrings.COMMA_CHARACTER);
246 }
247 content_element = null;
248 }
249 content_elements = null;
250 return text.toString ();
251 }
252
253 static private String languagesToString (Element command_element) {
254 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGES_STR);
255 text.append (StaticStrings.TAB_CHARACTER);
256 // Retrieve all the languages and write them out in a space separated list
257 NodeList language_elements = command_element.getElementsByTagName (StaticStrings.LANGUAGE_ELEMENT);
258 int language_elements_length = language_elements.getLength ();
259 if(language_elements_length == 0) {
260 return null;
261 }
262 for(int j = 0; j < language_elements_length; j++) {
263 Element language_element = (Element) language_elements.item (j);
264 text.append (language_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
265 if(j < language_elements_length - 1) {
266 text.append (StaticStrings.SPACE_CHARACTER);
267 }
268 }
269 return text.toString ();
270 }
271
272 static private String languageDefaultToString (Element command_element) {
273 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGE_DEFAULT_STR);
274 text.append (StaticStrings.TAB_CHARACTER);
275 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
276 return text.toString ();
277 }
278
279 static private String languageMetadataToString (Element command_element) {
280 if (!command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
281 return "";
282 }
283 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGE_METADATA_STR);
284 text.append (StaticStrings.TAB_CHARACTER);
285 String name_str = command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
286 text.append (name_str);
287 return text.toString ();
288 }
289
290 static private String indexOptionsToString (Element command_element) {
291 StringBuffer text = new StringBuffer ("");
292 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
293 text.append ("#");
294 }
295 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
296 text.append (StaticStrings.TAB_CHARACTER);
297 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.INDEXOPTION_ELEMENT);
298 int content_elements_length = content_elements.getLength ();
299 // Don't output anything if no options are set.
300 if(content_elements_length == 0) {
301 return null;
302 }
303 for(int i = 0; i < content_elements_length; i++) {
304 Element content_element = (Element) content_elements.item (i);
305 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
306 text.append (StaticStrings.SPACE_CHARACTER);
307 }
308 return text.substring (0, text.length () - 1);
309 }
310
311 static private String indexOptionDefaultToString (Element command_element) {
312 // Don't bother if there is no value
313 if (command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE).equals ("")) {
314 return "";
315 }
316 StringBuffer text = new StringBuffer ("");
317 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
318 text.append ("#");
319 }
320 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
321 text.append (StaticStrings.TAB_CHARACTER);
322 text.append (command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE));
323 return text.toString ();
324 }
325
326 static private String metadataToString (Element command_element) {
327 // lets first check the value - if its empty, don't bother sticking it in the config file
328 String value_str = XMLTools.getValue (command_element);
329 if (value_str.equals ("")) {
330 return "";
331 }
332 boolean special = false;
333
334 StringBuffer text = new StringBuffer ("");
335 String name_str = command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
336 // If the name is one of the special four, we don't write the collectionmeta first. Note maintainer and buildtype are singled out for 'prittying' reasons.
337 if(name_str.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR)|| name_str.equals (StaticStrings.BUILDTYPE_STR) ) {
338 text.append (name_str);
339 text.append (StaticStrings.TAB_CHARACTER);
340 special = true;
341 }
342 else if (name_str.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR) || name_str.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR) ) {
343 text.append (name_str);
344 text.append (StaticStrings.TAB_CHARACTER);
345 text.append (StaticStrings.TAB_CHARACTER);
346 special = true;
347 }
348 else {
349 text.append (StaticStrings.COLLECTIONMETADATA_STR);
350 text.append (StaticStrings.TAB_CHARACTER);
351 text.append (name_str);
352 text.append (StaticStrings.SPACE_CHARACTER);
353 String language_str = command_element.getAttribute (StaticStrings.LANGUAGE_ATTRIBUTE);
354 text.append (StaticStrings.LBRACKET_CHARACTER);
355 text.append (StaticStrings.LANGUAGE_ARGUMENT);
356 text.append (language_str);
357 text.append (StaticStrings.RBRACKET_CHARACTER);
358 text.append (StaticStrings.SPACE_CHARACTER);
359 }
360 name_str = null;
361
362 // decode the value from XML to a form for config file
363 value_str = Codec.transform (value_str, Codec.DOM_TO_GREENSTONE);
364
365 // We don't wrap the email addresses in quotes, nor the other special metadata
366 if(special) {
367 text.append (value_str);
368 }
369 else {
370 text.append (StaticStrings.SPEECH_CHARACTER);
371 text.append (value_str);
372 text.append (StaticStrings.SPEECH_CHARACTER);
373 }
374 value_str = null;
375 return text.toString ();
376 }
377
378 static private String searchtypeToString (Element command_element) {
379 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
380 StringBuffer text = new StringBuffer (StaticStrings.SEARCHTYPE_STR);
381 text.append (StaticStrings.TAB_CHARACTER);
382 NodeList search_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
383 int search_elements_length = search_elements.getLength ();
384 for(int i = 0; i < search_elements_length; i++) {
385 Element search_element = (Element) search_elements.item (i);
386 text.append (search_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
387 text.append (StaticStrings.SPACE_CHARACTER);
388 }
389 return text.substring (0, text.length () - 1);
390 }
391 else {
392 return null;
393 }
394 }
395
396 static private String subcollectionToString (Element command_element) {
397 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_STR);
398 text.append (StaticStrings.SPACE_CHARACTER);
399 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
400 text.append (StaticStrings.SPACE_CHARACTER);
401 text.append (StaticStrings.TAB_CHARACTER);
402 text.append (StaticStrings.SPEECH_CHARACTER);
403 if(command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE).equals (StaticStrings.EXCLUDE_STR)) {
404 text.append (StaticStrings.EXCLAMATION_CHARACTER);
405 }
406 text.append (command_element.getAttribute (StaticStrings.CONTENT_ATTRIBUTE));
407 text.append (StaticStrings.SEPARATOR_CHARACTER);
408 text.append (XMLTools.getValue (command_element));
409 text.append (StaticStrings.SEPARATOR_CHARACTER);
410 String options_str = command_element.getAttribute (StaticStrings.OPTIONS_ATTRIBUTE);
411 if(options_str.length () > 0) {
412 text.append (options_str);
413 }
414 options_str = null;
415 text.append (StaticStrings.SPEECH_CHARACTER);
416 return text.toString ();
417 }
418
419 static private String subcollectionDefaultIndexToString (Element command_element) {
420 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_STR);
421 text.append (StaticStrings.TAB_CHARACTER);
422 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
423 int content_elements_length = content_elements.getLength ();
424 for(int j = 0; j < content_elements_length; j++) {
425 Element content_element = (Element) content_elements.item (j);
426 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
427 if(j < content_elements_length - 1) {
428 text.append (StaticStrings.COMMA_CHARACTER);
429 }
430 }
431 return text.toString ();
432 }
433
434 static private String subcollectionIndexesToString (Element command_element) {
435 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_INDEX_STR);
436 text.append (StaticStrings.TAB_CHARACTER);
437 // Retrieve all of the subcollection index partitions
438 NodeList subcollectionindex_elements = command_element.getElementsByTagName (StaticStrings.INDEX_ELEMENT);
439 int subcollectionindex_elements_length = subcollectionindex_elements.getLength ();
440 if(subcollectionindex_elements_length == 0) {
441 return null;
442 }
443 for(int j = 0; j < subcollectionindex_elements_length; j++) {
444 Element subcollectionindex_element = (Element) subcollectionindex_elements.item (j);
445 NodeList content_elements = subcollectionindex_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
446 int content_elements_length = content_elements.getLength ();
447 for(int k = 0; k < content_elements_length; k++) {
448 Element content_element = (Element) content_elements.item (k);
449 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
450 if(k < content_elements_length - 1) {
451 text.append (StaticStrings.COMMA_CHARACTER);
452 }
453 }
454 if(j < subcollectionindex_elements_length - 1) {
455 text.append (StaticStrings.SPACE_CHARACTER);
456 }
457 }
458 return text.toString ();
459 }
460
461 static private String supercollectionToString (Element command_element) {
462 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.COLLECTION_ELEMENT);
463 int content_elements_length = content_elements.getLength ();
464 if(content_elements_length > 1) {
465 StringBuffer text = new StringBuffer (StaticStrings.SUPERCOLLECTION_STR);
466 text.append (StaticStrings.TAB_CHARACTER);
467 for(int j = 0; j < content_elements_length; j++) {
468 Element content_element = (Element) content_elements.item (j);
469 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
470 if(j < content_elements_length - 1) {
471 text.append (StaticStrings.SPACE_CHARACTER);
472 }
473 }
474 return text.toString ();
475 }
476 return null;
477 }
478
479 static private String unknownToString (Element command_element) {
480 return XMLTools.getValue (command_element);
481 }
482
483
484 /** Parse a collect.cfg into a DOM model representation.
485 * note we are ignoring 2.39 compatibility now. */
486 static public String parse (File collect_cfg_file, Document document) {
487 // hack for pre 2.71 compatibility - we need to add in a
488 // build type if there is not one there
489 boolean search_types_parsed = false;
490 boolean build_types_parsed = false;
491 try {
492 StringBuffer saved_collect_cfg_string_buffer = new StringBuffer ();
493
494 Element collect_cfg_element = document.getDocumentElement ();
495 // Read in the file one command at a time.
496 InputStream istream = new FileInputStream (collect_cfg_file);
497 Reader in_reader = new InputStreamReader (istream, CollectionConfiguration.ENCODING);
498 BufferedReader in = new BufferedReader (in_reader);
499 String command_str = null;
500 while((command_str = in.readLine ()) != null) {
501 saved_collect_cfg_string_buffer.append (command_str + "\n");
502
503 boolean append_element = true;
504 Element command_element = null;
505 // A command may be broken over several lines.
506 command_str = command_str.trim ();
507 boolean eof = false;
508 while(!eof && command_str.endsWith (StaticStrings.NEWLINE_CHARACTER)) {
509 String next_line = in.readLine ();
510 if(next_line != null) {
511 next_line = next_line.trim ();
512 if(next_line.length () > 0) {
513 // Remove the new line character
514 command_str = command_str.substring (0, command_str.lastIndexOf (StaticStrings.NEWLINE_CHARACTER));
515 // And append the next line, which due to the test above must be non-zero length
516 command_str = command_str + next_line;
517 }
518 next_line = null;
519 }
520 // If we've reached the end of the file theres nothing more we can do
521 else {
522 eof = true;
523 }
524 }
525 // If there is still a new line character, then we remove it and hope for the best
526 if(command_str.endsWith (StaticStrings.NEWLINE_CHARACTER)) {
527 command_str = command_str.substring (0, command_str.lastIndexOf (StaticStrings.NEWLINE_CHARACTER));
528 }
529 // Now we've either got a command to parse...
530 if(command_str.length () != 0) {
531 // Start trying to figure out what it is
532 //StringTokenizer tokenizer = new StringTokenizer(command_str);
533 // Instead of a standard string tokenizer I'm going to use the new version of CommandTokenizer, which is not only smart enough to correctly notice speech marks and correctly parse them out, but now also takes the input stream so it can rebuild tokens that stretch over several lines.
534 CommandTokenizer tokenizer = new CommandTokenizer (command_str, in);
535 String command_type = tokenizer.nextToken ().toLowerCase ();
536 // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created
537 if(command_element == null && command_type.equals (StaticStrings.CLASSIFY_STR)) {
538 command_element = parseClassify (command_str, document);
539 }
540 if(command_element == null && command_type.equals (StaticStrings.FORMAT_STR)) {
541 command_element = parseFormat (tokenizer, document); // Revised to handle multiple lines
542 }
543 if(command_element == null && (command_type.equals (StaticStrings.INDEX_STR) || command_type.equals (StaticStrings.COMMENTED_INDEXES_STR))) {
544 command_element = parseIndex (command_str, document);
545 }
546 if(command_element == null && (command_type.equals (StaticStrings.INDEX_DEFAULT_STR) || command_type.equals (StaticStrings.COMMENTED_INDEX_DEFAULT_STR))) {
547
548 command_element = parseIndexDefault (command_str, document);
549 }
550 if(command_element == null && command_type.equals (StaticStrings.LANGUAGES_STR)) {
551 command_element = parseLanguage (command_str, document);
552 }
553 if(command_element == null && command_type.equals (StaticStrings.LANGUAGE_DEFAULT_STR)) {
554 command_element = parseLanguageDefault (command_str, document);
555 }
556 if (command_element == null && command_type.equals (StaticStrings.LANGUAGE_METADATA_STR)) {
557 command_element = parseLanguageMetadata (command_str, document);
558 }
559 if(command_element == null && command_type.equals (StaticStrings.LEVELS_STR)) {
560 command_element = parseIndexOptions (command_str, document, StaticStrings.LEVELS_STR, true);
561 }
562 if (command_element == null && command_type.equals (StaticStrings.COMMENTED_LEVELS_STR)) {
563 command_element = parseIndexOptions (command_str, document, StaticStrings.LEVELS_STR, false);
564 }
565 if(command_element == null && command_type.equals (StaticStrings.LEVEL_DEFAULT_STR)) {
566 command_element = parseIndexOptionDefault (command_str, document, StaticStrings.LEVEL_DEFAULT_STR, true);
567 }
568 if(command_element == null && command_type.equals (StaticStrings.COMMENTED_LEVEL_DEFAULT_STR)) {
569 command_element = parseIndexOptionDefault (command_str, document, StaticStrings.LEVEL_DEFAULT_STR, false);
570 }
571 if (command_element == null && command_type.equals (StaticStrings.INDEXOPTIONS_STR)) {
572 command_element = parseIndexOptions (command_str, document, StaticStrings.INDEXOPTIONS_STR, true);
573 }
574 if (command_element == null && command_type.equals (StaticStrings.COMMENTED_INDEXOPTIONS_STR)) {
575 command_element = parseIndexOptions (command_str, document, StaticStrings.INDEXOPTIONS_STR, false);
576 }
577 if(command_element == null && command_type.equals (StaticStrings.COLLECTIONMETADATA_STR)) {
578 command_element = parseMetadata (tokenizer, document); // Revised to handle multiple lines
579 }
580 if(command_element == null && (command_type.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR) || command_type.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR) || command_type.equals (StaticStrings.BUILDTYPE_STR))) {
581 command_element = parseMetadataSpecial (command_str, document);
582 // pre 2.71 hack
583 if (command_type.equals (StaticStrings.BUILDTYPE_STR)) {
584 build_types_parsed = true;
585 }
586 }
587 if(command_element == null && command_type.equals (StaticStrings.PLUGIN_STR)) {
588 command_element = parsePlugin (command_str, document);
589 }
590 // leave here for backwards compatibility
591 if(command_element == null && command_type.equals (StaticStrings.SEARCHTYPE_STR)) {
592 command_element = parseSearchType (command_str, document);
593 // pre 2.71 hack
594 search_types_parsed = true;
595
596 }
597 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_STR)) {
598 command_element = parseSubCollection (command_str, document);
599 }
600 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_STR)) {
601 command_element = parseSubCollectionDefaultIndex (command_str, document);
602 }
603 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_INDEX_STR)) {
604 command_element = parseSubCollectionIndex (command_str, document);
605 }
606 if(command_element == null && (command_type.equals (StaticStrings.SUPERCOLLECTION_STR) || command_type.equals (StaticStrings.CCS_STR))) {
607 command_element = parseSuperCollection (command_str, document);
608 }
609 // Doesn't match any known type
610 command_type = null;
611 if(command_element == null) {
612 // No-one knows what to do with this command, so we create an Unknown command element
613 command_element = document.createElement (StaticStrings.UNKNOWN_ELEMENT);
614 XMLTools.setValue (command_element, command_str);
615 }
616 }
617 // Or an empty line to remember for later
618 else {
619 command_element = document.createElement (CollectionConfiguration.NEWLINE_ELEMENT);
620 }
621 // Now command element shouldn't be null so we append it to the collection config DOM, but only if we haven't been told not to add it
622 //if(append_element) {
623 collect_cfg_element.appendChild (command_element);
624 //}
625 }
626 if (!build_types_parsed) {
627 String buildtype_type = BuildTypeManager.BUILD_TYPE_MG;
628 if (search_types_parsed) {
629 buildtype_type = BuildTypeManager.BUILD_TYPE_MGPP;
630 }
631 Element command_element = parseMetadataSpecial (StaticStrings.BUILDTYPE_STR+" "+buildtype_type, document);
632 Node target_node = CollectionConfiguration.findInsertionPoint (command_element);
633 if(target_node != null) {
634 collect_cfg_element.insertBefore (command_element, target_node);
635 }
636 else {
637 collect_cfg_element.appendChild (command_element);
638 }
639
640 }
641 return saved_collect_cfg_string_buffer.toString();
642 }
643 catch(Exception exception) {
644 DebugStream.println ("Error in CollectionConfiguration.parse(java.io.File): " + exception);
645 DebugStream.printStackTrace (exception);
646 }
647
648 return null;
649 }
650
651
652 /** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix.
653 * @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string
654 * @return a HashMap containing the arguments parsed
655 */
656 static public HashMap parseArguments (CommandTokenizer tokenizer) {
657 HashMap arguments = new HashMap ();
658 String name = null;
659 String value = null;
660 while(tokenizer.hasMoreTokens () || name != null) {
661 // First we retrieve a name if we need one.
662 if(name == null) {
663 name = tokenizer.nextToken ();
664 }
665 // Now we attempt to retrieve a value
666 if(tokenizer.hasMoreTokens ()) {
667 value = tokenizer.nextToken ();
668 // Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop.
669 // The value is not a name if it contains a space character: it's a quoted value
670 if (value.startsWith(StaticStrings.MINUS_CHARACTER) && value.indexOf(StaticStrings.SPACE_CHARACTER) == -1) {
671 arguments.put (name, null);
672 name = value;
673 }
674 // Otherwise we have a typical name->value pair ready to go
675 else {
676 arguments.put (name, value);
677 name = null;
678 }
679 }
680 // Otherwise its a binary flag
681 else {
682 arguments.put (name, null);
683 name = null;
684 }
685 }
686 return arguments;
687 }
688
689 static private Element parseClassify (String command_str, Document document) {
690 Element command_element = null;
691 try {
692 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
693 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
694 if(tokenizer.countTokens () >= 2) { // Must support "classify Phind" (no args)
695 command_element = document.createElement (StaticStrings.CLASSIFY_ELEMENT);
696 // First token is classify
697 tokenizer.nextToken ();
698 // The next token is the classifier type
699 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, tokenizer.nextToken ());
700 // Now we parse out the remaining arguments into a hashmapping from name to value
701 HashMap arguments = parseArguments (tokenizer);
702 // Assign the arguments as Option elements
703 Iterator names = arguments.keySet ().iterator ();
704 while(names.hasNext ()) {
705 String name = (String) names.next ();
706 String value = (String) arguments.get (name); // Can be null
707 Element option_element = document.createElement (StaticStrings.OPTION_ELEMENT);
708 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name.substring (1));
709 if(value != null) {
710 // Remove any speech marks appended in strings containing whitespace
711 if(value.startsWith (StaticStrings.SPEECH_CHARACTER) && value.endsWith (StaticStrings.SPEECH_CHARACTER)) {
712 value = value.substring (1, value.length () - 1);
713 }
714 XMLTools.setValue (option_element, value);
715 }
716 option_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
717 command_element.appendChild (option_element);
718 option_element = null;
719 name = null;
720 value = null;
721 }
722 names = null;
723 arguments = null;
724 }
725 tokenizer = null;
726 }
727 catch(Exception error) {
728 }
729 return command_element;
730 }
731
732 static private Element parseFormat (CommandTokenizer tokenizer, Document document) {
733 Element command_element = null;
734 try {
735 command_element = document.createElement (StaticStrings.FORMAT_ELEMENT);
736 String name_str = tokenizer.nextToken ();
737 String value_str = tokenizer.nextToken ();
738 if(name_str != null && value_str != null) {
739 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
740 // If the value is true or false we add it as an attribute
741 if(value_str.equalsIgnoreCase (StaticStrings.TRUE_STR) || value_str.equalsIgnoreCase (StaticStrings.FALSE_STR)) {
742 command_element.setAttribute (StaticStrings.VALUE_ATTRIBUTE, value_str.toLowerCase ());
743 }
744 // Otherwise it gets added as a text node
745 else {
746 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
747 value_str = Codec.transform (value_str, Codec.GREENSTONE_TO_DOM);
748 XMLTools.setValue (command_element, value_str);
749 }
750 }
751 else {
752 command_element = null;
753 }
754 name_str = null;
755 value_str = null;
756 }
757 catch (Exception exception) {
758 DebugStream.printStackTrace (exception);
759 command_element = null;
760 }
761 return command_element;
762 }
763
764 static private Element parseIndex (String command_str, Document document) {
765 Element command_element = null;
766 try {
767 StringTokenizer tokenizer = new StringTokenizer (command_str);
768 String command = tokenizer.nextToken ();
769 command_element = document.createElement (StaticStrings.INDEXES_ELEMENT);
770 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (command.equals (StaticStrings.INDEX_STR) ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
771 command = null;
772 if(!tokenizer.hasMoreTokens ()) {
773
774 // there are no indexes
775 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.FALSE_STR);
776 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.FALSE_STR); // for now
777 tokenizer = null;
778 return command_element;
779 }
780
781 while(tokenizer.hasMoreTokens ()) {
782 Element index_element = document.createElement (StaticStrings.INDEX_ELEMENT);
783 String index_str = tokenizer.nextToken ();
784 // There are two types of index we have to consider. MG versions use "level:source,source" while MGPP versions use "source,source source"
785 if(index_str.indexOf (StaticStrings.COLON_CHARACTER) != -1) {
786 index_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE, index_str.substring (0, index_str.indexOf (StaticStrings.COLON_CHARACTER)));
787 index_str = index_str.substring (index_str.indexOf (StaticStrings.COLON_CHARACTER) + 1);
788 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.FALSE_STR);
789 }
790 else {
791 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.TRUE_STR);
792 }
793 StringTokenizer content_tokenizer = new StringTokenizer (index_str, StaticStrings.COMMA_CHARACTER);
794 while(content_tokenizer.hasMoreTokens ()) {
795 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
796 String content_str = content_tokenizer.nextToken ();
797 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
798 if(content_str.indexOf (StaticStrings.NS_SEP) == -1) {
799 if(content_str.equals (StaticStrings.TEXT_STR) || content_str.equals (StaticStrings.ALLFIELDS_STR) || content_str.equals(StaticStrings.METADATA_STR)) {
800 // Our special strings are OK.
801 }
802 else {
803 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
804 }
805 }
806 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_str);
807 index_element.appendChild (content_element);
808 content_element = null;
809 }
810 content_tokenizer = null;
811 index_str = null;
812 command_element.appendChild (index_element);
813 index_element = null;
814 }
815 tokenizer = null;
816 }
817 catch (Exception exception) {
818 exception.printStackTrace ();
819 }
820 return command_element;
821 }
822
823 static private Element parseIndexDefault (String command_str, Document document) {
824 Element command_element = null;
825 try {
826 StringTokenizer tokenizer = new StringTokenizer (command_str);
827 if(tokenizer.countTokens () >= 2) {
828 command_element = document.createElement (StaticStrings.INDEX_DEFAULT_ELEMENT);
829 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken ()).equals (StaticStrings.INDEX_DEFAULT_STR) ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
830 String index_str = tokenizer.nextToken ();
831 String level="";
832 if (index_str.indexOf (StaticStrings.COLON_CHARACTER) !=-1) {
833 level = index_str.substring (0, index_str.indexOf (StaticStrings.COLON_CHARACTER));
834 }
835
836 command_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE,level);
837
838 String content_str = index_str;
839
840 if (index_str.indexOf (StaticStrings.COLON_CHARACTER) !=-1) {
841 content_str = index_str.substring (index_str.indexOf (StaticStrings.COLON_CHARACTER) + 1);
842 }
843
844 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
845 while(content_tokenizer.hasMoreTokens ()) {
846 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
847 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
848 command_element.appendChild (content_element);
849 content_element = null;
850 }
851 content_tokenizer = null;
852 content_str = null;
853 content_str = null;
854 index_str = null;
855 }
856 tokenizer = null;
857 }
858 catch (Exception exception) {
859 }
860 return command_element;
861 }
862
863 static private Element parseLanguage (String command_str, Document document) {
864 Element command_element = null;
865 try {
866 StringTokenizer tokenizer = new StringTokenizer (command_str);
867 tokenizer.nextToken ();
868 if(tokenizer.hasMoreTokens ()) {
869 command_element = document.createElement (StaticStrings.LANGUAGES_ELEMENT);
870 while(tokenizer.hasMoreTokens ()) {
871 Element language_element = document.createElement (StaticStrings.LANGUAGE_ELEMENT);
872 language_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
873 command_element.appendChild (language_element);
874 language_element = null;
875 }
876 }
877 tokenizer = null;
878 }
879 catch (Exception exception) {
880 }
881 return command_element;
882 }
883
884 static private Element parseLanguageDefault (String command_str, Document document) {
885 Element command_element = null;
886 try {
887 StringTokenizer tokenizer = new StringTokenizer (command_str);
888 if(tokenizer.countTokens () >= 2) {
889 command_element = document.createElement (StaticStrings.LANGUAGE_DEFAULT_ELEMENT);
890 tokenizer.nextToken ();
891 String default_language_str = tokenizer.nextToken ();
892 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, default_language_str);
893 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
894 default_language_str = null;
895 }
896 tokenizer = null;
897 }
898 catch (Exception exception) {
899 }
900 return command_element;
901 }
902
903 static private Element parseLanguageMetadata (String command_str, Document document) {
904 Element command_element = null;
905 try {
906 StringTokenizer tokenizer = new StringTokenizer (command_str);
907 if(tokenizer.countTokens () >= 2) {
908 command_element = document.createElement (StaticStrings.LANGUAGE_METADATA_ELEMENT);
909 tokenizer.nextToken ();
910 String language_metadata_str = tokenizer.nextToken ();
911 if (language_metadata_str.indexOf (StaticStrings.NS_SEP) == -1) {
912 language_metadata_str = StaticStrings.EXTRACTED_NAMESPACE + language_metadata_str;
913 }
914 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, language_metadata_str);
915 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
916 language_metadata_str = null;
917 }
918 tokenizer = null;
919
920 }
921 catch (Exception exception) {
922 }
923 return command_element;
924 }
925
926 static private Element parseIndexOptions (String command_str, Document document, String type, boolean assigned) {
927 Element command_element = null;
928 try {
929 StringTokenizer tokenizer = new StringTokenizer (command_str);
930 // First token is command type
931 String command = tokenizer.nextToken ();
932 if(tokenizer.hasMoreTokens ()) {
933 command_element = document.createElement (StaticStrings.INDEXOPTIONS_ELEMENT);
934 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE,type);
935 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (assigned ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
936 while(tokenizer.hasMoreTokens ()) {
937 Element option_element = document.createElement (StaticStrings.INDEXOPTION_ELEMENT);
938 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
939 command_element.appendChild (option_element);
940 option_element = null;
941 }
942 }
943 command = null;
944 }
945 catch(Exception exception) {
946 }
947 return command_element;
948 }
949
950 static private Element parseIndexOptionDefault (String command_str, Document document, String type, boolean assigned) {
951 Element command_element = null;
952 try {
953 StringTokenizer tokenizer = new StringTokenizer (command_str);
954 // First token is command type
955 String command = tokenizer.nextToken ();
956 if(tokenizer.hasMoreTokens ()) {
957 command_element = document.createElement (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT);
958 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (assigned ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR)); // is it commented out or not?
959 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, type);
960 command_element.setAttribute (StaticStrings.VALUE_ATTRIBUTE, tokenizer.nextToken ());
961 }
962
963 tokenizer = null;
964 }
965 catch (Exception exception) {
966 }
967 return command_element;
968 }
969
970 static private Element parseMetadata (CommandTokenizer tokenizer, Document document) {
971 Element command_element = null;
972 try {
973 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_ELEMENT);
974 String name_str = tokenizer.nextToken ();
975 String value_str = tokenizer.nextToken ();
976 if(name_str != null && value_str != null) {
977 String language_str = Configuration.getLanguage ();
978 // Check if the value string is actually a language string
979 if(value_str.startsWith (StaticStrings.LBRACKET_CHARACTER) && value_str.endsWith (StaticStrings.RBRACKET_CHARACTER)) {
980 language_str = value_str.substring (value_str.indexOf (StaticStrings.LANGUAGE_ARGUMENT) + 2, value_str.length () - 1);
981 value_str = tokenizer.nextToken ();
982 }
983 if(value_str != null) {
984 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
985 value_str = Codec.transform (value_str, Codec.GREENSTONE_TO_DOM);
986 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
987 command_element.setAttribute (StaticStrings.LANGUAGE_ATTRIBUTE, language_str);
988 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
989 XMLTools.setValue (command_element, value_str);
990 }
991 else {
992 command_element = null;
993 }
994 language_str = null;
995 }
996 else {
997 command_element = null;
998 }
999 name_str = null;
1000 value_str = null;
1001 }
1002 catch (Exception exception) {
1003 DebugStream.printStackTrace (exception);
1004 command_element = null;
1005 }
1006 return command_element;
1007 }
1008
1009 static private Element parseMetadataSpecial (String command_str, Document document) {
1010 Element command_element = null;
1011 try {
1012 StringTokenizer tokenizer = new StringTokenizer (command_str);
1013 if(tokenizer.countTokens () >= 2) {
1014 String name_str = tokenizer.nextToken ();
1015 String value_str = tokenizer.nextToken ();
1016 if (name_str.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR)) {
1017 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT);
1018 }
1019 else if(name_str.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR)) {
1020 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT);
1021 }
1022 else if(name_str.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR)) {
1023 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT);
1024 }
1025 else if (name_str.equals (StaticStrings.BUILDTYPE_STR)) {
1026 command_element = document.createElement (StaticStrings.BUILDTYPE_ELEMENT);
1027 }
1028 if(command_element != null) {
1029 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
1030 command_element.setAttribute (StaticStrings.LANGUAGE_ATTRIBUTE, StaticStrings.ENGLISH_LANGUAGE_STR);
1031 command_element.setAttribute (StaticStrings.SPECIAL_ATTRIBUTE, StaticStrings.TRUE_STR);
1032 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1033 if(value_str.startsWith (StaticStrings.SPEECH_CHARACTER) && value_str.endsWith (StaticStrings.SPEECH_CHARACTER)) {
1034 value_str = value_str.substring (1, value_str.length () - 1);
1035 }
1036 XMLTools.setValue (command_element, value_str);
1037 }
1038 value_str = null;
1039 name_str = null;
1040 }
1041 tokenizer = null;
1042 }
1043 catch (Exception exception) {
1044 }
1045 return command_element;
1046 }
1047
1048 static private Element parsePlugin (String command_str, Document document) {
1049 Element command_element = null;
1050 try {
1051 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
1052 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1053 if(tokenizer.countTokens () >= 2) {
1054 command_element = document.createElement (StaticStrings.PLUGIN_ELEMENT);
1055 // First token is plugin
1056 tokenizer.nextToken ();
1057 // The next token is the type
1058 String type = tokenizer.nextToken ();
1059 type = Utility.ensureNewPluginName(type);
1060 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, type);
1061 // Now we parse out the remaining arguments into a hashmapping from name to value
1062 HashMap arguments = parseArguments (tokenizer);
1063 // also watch out for the deprecated -use_metadata_files option to RecPlug and remove it
1064 Iterator names = arguments.keySet ().iterator ();
1065 while(names.hasNext ()) {
1066 String name = (String) names.next ();
1067 String value = (String) arguments.get (name); // Can be null
1068
1069 if(type.equals (StaticStrings.RECPLUG_STR) && name.substring (1).equals (StaticStrings.USE_METADATA_FILES_ARGUMENT)) {
1070 continue; // ignore this option
1071 }
1072 Element option_element = document.createElement (StaticStrings.OPTION_ELEMENT);
1073 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name.substring (1));
1074 option_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1075 if(value != null) {
1076 // Remove any speech marks appended in strings containing whitespace
1077 if(value.startsWith (StaticStrings.SPEECH_CHARACTER) && value.endsWith (StaticStrings.SPEECH_CHARACTER)) {
1078 value = value.substring (1, value.length () - 1);
1079 }
1080 XMLTools.setValue (option_element, value);
1081 }
1082 command_element.appendChild (option_element);
1083 option_element = null;
1084 name = null;
1085 value = null;
1086 }
1087
1088 type = null;
1089 names = null;
1090 arguments = null;
1091 }
1092 tokenizer = null;
1093 }
1094 catch(Exception exception) {
1095 // This catch clause had been left empty. If this is deliberate then
1096 // we should have a comment here explaining why there is no need to
1097 // print anything out. Am assuming this is mistake for now, and
1098 // have added in a call to printStackTrace()
1099 System.err.println("Malformed plugin statement");
1100 exception.printStackTrace();
1101 }
1102 return command_element;
1103 }
1104
1105 /* search types are now handled as formats - leave this here to convert in case we have an old config file */
1106 static private Element parseSearchType (String command_str, Document document) {
1107 Element command_element = null;
1108 try {
1109 StringTokenizer tokenizer = new StringTokenizer (command_str);
1110 // First token is command type (searchtype)
1111 tokenizer.nextToken ();
1112 if(tokenizer.hasMoreTokens ()) {
1113 command_element = document.createElement (StaticStrings.FORMAT_ELEMENT);
1114 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, "SearchTypes");
1115 String value = tokenizer.nextToken ();
1116 while(tokenizer.hasMoreTokens ()) {
1117 value += ","+tokenizer.nextToken ();
1118 }
1119 value = Codec.transform (value, Codec.GREENSTONE_TO_DOM);
1120 XMLTools.setValue (command_element, value);
1121 }
1122 }
1123 catch(Exception exception) {
1124 }
1125 return command_element;
1126 }
1127
1128 static private Element parseSubCollection (String command_str, Document document) {
1129 Element command_element = null;
1130 try {
1131 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
1132 if(tokenizer.countTokens () >= 3) {
1133 command_element = document.createElement (StaticStrings.SUBCOLLECTION_ELEMENT);
1134 // First token is command type
1135 tokenizer.nextToken ();
1136 // Then subcollection identifier
1137 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
1138 // Then finally the pattern used to build the subcollection partition
1139 String full_pattern_str = tokenizer.nextToken ();
1140 // Set inclusion/exclusion flag and remove any exclamation mark
1141 boolean exclusion = full_pattern_str.startsWith (StaticStrings.EXCLAMATION_CHARACTER);
1142 if (exclusion) {
1143 full_pattern_str = full_pattern_str.substring (1, full_pattern_str.length ());
1144 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, StaticStrings.EXCLUDE_STR);
1145 }
1146 else {
1147 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, StaticStrings.INCLUDE_STR);
1148 }
1149
1150 // Let's make sure it is a valid Greenstone configuration line
1151 String[] results = full_pattern_str.split("\\" + StaticStrings.SEPARATOR_CHARACTER, 3);
1152
1153 if (results.length >= 2) {
1154 String content_str = results[0];
1155 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1156 if (!content_str.equals (StaticStrings.FILENAME_STR) && content_str.indexOf (StaticStrings.NS_SEP) == -1) {
1157 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
1158 }
1159 command_element.setAttribute (StaticStrings.CONTENT_ATTRIBUTE, content_str);
1160 XMLTools.setValue (command_element, results[1]);
1161 if (results.length >= 3) {
1162 command_element.setAttribute (StaticStrings.OPTIONS_ATTRIBUTE, results[2]);
1163 }
1164 }
1165 }
1166 }
1167 catch(Exception exception) {
1168 exception.printStackTrace ();
1169 }
1170 return command_element;
1171 }
1172
1173 static private Element parseSubCollectionDefaultIndex (String command_str, Document document) {
1174 Element command_element = null;
1175 try {
1176 StringTokenizer tokenizer = new StringTokenizer (command_str);
1177 if(tokenizer.countTokens () == 2) {
1178 command_element = document.createElement (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT);
1179 tokenizer.nextToken ();
1180 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1181 String content_str = tokenizer.nextToken ();
1182 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
1183 while(content_tokenizer.hasMoreTokens ()) {
1184 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
1185 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
1186 command_element.appendChild (content_element);
1187 content_element = null;
1188 }
1189 content_tokenizer = null;
1190 content_str = null;
1191 }
1192 tokenizer = null;
1193 }
1194 catch(Exception exception) {
1195 }
1196 return command_element;
1197 }
1198
1199 static private Element parseSubCollectionIndex (String command_str, Document document) {
1200 Element command_element = null;
1201 try {
1202 StringTokenizer tokenizer = new StringTokenizer (command_str);
1203 tokenizer.nextToken ();
1204 if(tokenizer.hasMoreTokens ()) {
1205 command_element = document.createElement (StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT);
1206 }
1207 while(tokenizer.hasMoreTokens ()) {
1208 Element subcollectionindex_element = document.createElement (StaticStrings.INDEX_ELEMENT);
1209 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1210 String content_str = tokenizer.nextToken ();
1211 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
1212 while(content_tokenizer.hasMoreTokens ()) {
1213 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
1214 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
1215 subcollectionindex_element.appendChild (content_element);
1216 content_element = null;
1217 }
1218 content_tokenizer = null;
1219 content_str = null;
1220 command_element.appendChild (subcollectionindex_element);
1221 subcollectionindex_element = null;
1222 }
1223 tokenizer = null;
1224 }
1225 catch (Exception exception) {
1226 }
1227 return command_element;
1228 }
1229
1230 static private Element parseSuperCollection (String command_str, Document document) {
1231 Element command_element = null;
1232 try {
1233 StringTokenizer tokenizer = new StringTokenizer (command_str);
1234 if(tokenizer.countTokens () >= 3) {
1235 command_element = document.createElement (StaticStrings.SUPERCOLLECTION_ELEMENT);
1236 tokenizer.nextToken ();
1237 while(tokenizer.hasMoreTokens ()) {
1238 Element collection_element = document.createElement (StaticStrings.COLLECTION_ELEMENT);
1239 collection_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
1240 command_element.appendChild (collection_element);
1241 collection_element = null;
1242 }
1243 }
1244 tokenizer = null;
1245 }
1246 catch(Exception exception) {
1247 }
1248 return command_element;
1249 }
1250
1251 static private String pluginToString (Element command_element) {
1252 if(command_element.getAttribute (StaticStrings.SEPARATOR_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
1253 return "";
1254 }
1255 StringBuffer text = new StringBuffer (StaticStrings.PLUGIN_STR);
1256 text.append (StaticStrings.TAB_CHARACTER);
1257 text.append (command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE));
1258 // Retrieve, and output, the arguments
1259 NodeList option_elements = command_element.getElementsByTagName (StaticStrings.OPTION_ELEMENT);
1260 int option_elements_length = option_elements.getLength ();
1261 if(option_elements_length > 0) {
1262 for(int j = 0; j < option_elements_length; j++) {
1263 Element option_element = (Element) option_elements.item (j);
1264 if(option_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
1265 text.append (StaticStrings.SPACE_CHARACTER);
1266 text.append (StaticStrings.MINUS_CHARACTER);
1267 text.append (option_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
1268 String value_str = XMLTools.getValue (option_element);
1269 if (value_str.length () > 0) {
1270 text.append (StaticStrings.SPACE_CHARACTER);
1271 if(value_str.indexOf (StaticStrings.SPACE_CHARACTER) != -1) {
1272 // enclose in quotes
1273 text.append(StaticStrings.SPEECH_CHARACTER);
1274 text.append(value_str);
1275 text.append(StaticStrings.SPEECH_CHARACTER);
1276 } else {
1277
1278 text.append(value_str);
1279 }
1280 }
1281
1282 value_str = null;
1283 }
1284 option_element = null;
1285 }
1286 }
1287 option_elements = null;
1288
1289 return text.toString ();
1290 }
1291
1292 static public String generateStringVersion(Document document) {
1293
1294 StringBuffer collect_cfg_string_buffer = new StringBuffer ();
1295 NodeList command_elements = document.getDocumentElement ().getChildNodes ();
1296 boolean just_wrote_blank_line = false; // Prevent two or more blank lines in a row
1297 for (int i = 0; i < command_elements.getLength (); i++) {
1298 Node command_node = command_elements.item (i);
1299 if (!(command_node instanceof Element)) {
1300 // We're only interested in Elements
1301 continue;
1302 }
1303 Element command_element = (Element) command_node;
1304
1305 // Handle NewLine elements (blank lines)
1306 if (command_element.getNodeName ().equals (CollectionConfiguration.NEWLINE_ELEMENT) && !just_wrote_blank_line) {
1307 collect_cfg_string_buffer.append ("\n");
1308 just_wrote_blank_line = true;
1309 }
1310
1311 // Anything else we write to file, but only if it has been assigned, except for index and level commands
1312 // (which just get commented out if unassigned -- a side effect of MG & MGPP compatibility)
1313 else if (!command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR) || command_element.getNodeName ().equals (StaticStrings.INDEXES_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEX_DEFAULT_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEXOPTIONS_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT)) {
1314 String command = toString(command_element);
1315
1316 if (command != null && command.length ()> 0 ) {
1317 collect_cfg_string_buffer.append (command + "\n");
1318 just_wrote_blank_line = false;
1319 }
1320 }
1321 }
1322
1323 return collect_cfg_string_buffer.toString ();
1324 }
1325
1326
1327}
Note: See TracBrowser for help on using the repository browser.