source: main/trunk/gli/src/org/greenstone/gatherer/cdm/CollectCfgReadWrite.java@ 36194

Last change on this file since 36194 was 36194, checked in by kjdon, 2 years ago

handle searchmeta elements. Also make dot optional when writing out these metadata

File size: 58.1 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Methods to read collect.cfg files into internal XML form, and write
9 * them back out again.
10 *
11 * Copyright (C) 1999 New Zealand Digital Library Project
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 *########################################################################
27 */
28package org.greenstone.gatherer.cdm;
29
30import java.io.BufferedReader;
31import java.io.File;
32import java.io.FileInputStream;
33import java.io.InputStream;
34import java.io.InputStreamReader;
35import java.io.Reader;
36
37import java.util.HashMap;
38import java.util.Iterator;
39import java.util.StringTokenizer;
40
41import org.greenstone.gatherer.DebugStream;
42import org.greenstone.gatherer.Configuration;
43import org.greenstone.gatherer.metadata.MetadataElement;
44import org.greenstone.gatherer.metadata.MetadataTools;
45import org.greenstone.gatherer.util.Codec;
46import org.greenstone.gatherer.util.XMLTools;
47import org.greenstone.gatherer.util.StaticStrings;
48import org.greenstone.gatherer.util.Utility;
49
50import org.w3c.dom.*;
51
52public class CollectCfgReadWrite {
53
54
55 static public String toString (Element command_element) {
56 String command_element_name = command_element.getNodeName ();
57 if(command_element_name.equals (StaticStrings.CLASSIFY_ELEMENT)) {
58 return classifyToString (command_element);
59 }
60 else if(command_element_name.equals (StaticStrings.FORMAT_ELEMENT)) {
61 return formatToString (command_element);
62 }
63 else if(command_element_name.equals (StaticStrings.INDEXES_ELEMENT)) {
64 return indexesToString (command_element);
65 }
66 else if(command_element_name.equals (StaticStrings.INDEX_DEFAULT_ELEMENT)) {
67 return indexDefaultToString (command_element);
68 }
69 else if(command_element_name.equals (StaticStrings.LANGUAGES_ELEMENT)) {
70 return languagesToString (command_element);
71 }
72 else if(command_element_name.equals (StaticStrings.LANGUAGE_DEFAULT_ELEMENT)) {
73 return languageDefaultToString (command_element);
74 }
75 else if (command_element_name.equals (StaticStrings.LANGUAGE_METADATA_ELEMENT)) {
76 return languageMetadataToString (command_element);
77 }
78 else if(command_element_name.equals (StaticStrings.INDEXOPTIONS_ELEMENT)) {
79 return indexOptionsToString (command_element);
80 }
81 else if(command_element_name.equals (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT)) {
82 return indexOptionDefaultToString (command_element);
83 }
84 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_ELEMENT)) {
85 return metadataToString (command_element);
86 }
87 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT)) {
88 return metadataToString (command_element);
89 }
90 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT)) {
91 return metadataToString (command_element);
92 }
93 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT)) {
94 return metadataToString (command_element);
95 }
96 else if (command_element_name.equals (StaticStrings.SEARCHMETADATA_ELEMENT)) {
97 return metadataToString( command_element, true);
98 }
99 else if (command_element_name.equals (StaticStrings.BUILDTYPE_ELEMENT)) {
100 return metadataToString (command_element);
101 }
102 else if (command_element_name.equals (StaticStrings.DATABASETYPE_ELEMENT)) {
103 return metadataToString (command_element);
104 }
105 else if(command_element_name.equals (StaticStrings.PLUGIN_ELEMENT)) {
106 return pluginToString (command_element);
107 }
108 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_ELEMENT)) {
109 return subcollectionToString (command_element);
110 }
111 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) {
112 return subcollectionDefaultIndexToString (command_element);
113 }
114 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT)) {
115 return subcollectionIndexesToString (command_element);
116 }
117 else if(command_element_name.equals (StaticStrings.SUPERCOLLECTION_ELEMENT)) {
118 return supercollectionToString (command_element);
119 }
120 else if(command_element_name.equals (StaticStrings.UNKNOWN_ELEMENT)) {
121 return unknownToString (command_element);
122 }
123 return "";
124 }
125
126 static private String classifyToString (Element command_element) {
127 StringBuffer text = new StringBuffer (StaticStrings.CLASSIFY_STR);
128 text.append (StaticStrings.TAB_CHARACTER);
129 text.append (command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE));
130 NodeList option_elements = command_element.getElementsByTagName (StaticStrings.OPTION_ELEMENT);
131 int option_elements_length = option_elements.getLength ();
132 for(int j = 0; j < option_elements_length; j++) {
133 Element option_element = (Element) option_elements.item (j);
134 if(option_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
135 text.append (StaticStrings.SPACE_CHARACTER);
136 text.append (StaticStrings.MINUS_CHARACTER);
137 text.append (option_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
138 String value_str = XMLTools.getValue (option_element);
139
140 if (value_str.length () > 0) {
141 text.append (StaticStrings.SPACE_CHARACTER);
142 if(value_str.indexOf (StaticStrings.SPACE_CHARACTER) != -1) {
143 // enclose in quotes
144 text.append(StaticStrings.SPEECH_CHARACTER);
145 text.append(value_str);
146 text.append(StaticStrings.SPEECH_CHARACTER);
147 } else {
148
149 text.append(value_str);
150 }
151 }
152
153 value_str = null;
154 }
155 option_element = null;
156 }
157 option_elements = null;
158 return text.toString ();
159 }
160
161 static private String formatToString (Element command_element) {
162 StringBuffer text = new StringBuffer (StaticStrings.FORMAT_STR);
163 text.append (StaticStrings.SPACE_CHARACTER);
164 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
165 text.append (StaticStrings.SPACE_CHARACTER);
166 String value_str = command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE);
167 if(value_str.length () != 0) {
168 text.append (value_str);
169 }
170 else {
171 // Remember to encode format string to Greenstone specification
172 value_str = Codec.transform (XMLTools.getValue (command_element), Codec.DOM_TO_GREENSTONE);
173 text.append (StaticStrings.SPEECH_CHARACTER);
174 text.append (value_str);
175 text.append (StaticStrings.SPEECH_CHARACTER);
176 }
177 value_str = null;
178 return text.toString ();
179 }
180
181 static private String indexesToString (Element command_element) {
182 boolean comment_only = false;
183 StringBuffer text = new StringBuffer ("");
184 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
185 text.append ("#");
186 comment_only = true;
187 }
188 text.append (StaticStrings.INDEX_STR);
189 text.append (StaticStrings.TAB_CHARACTER);
190 if(!comment_only) {
191 text.append (StaticStrings.TAB_CHARACTER);
192 }
193 NodeList index_elements = command_element.getElementsByTagName (StaticStrings.INDEX_ELEMENT);
194 if (index_elements.getLength () == 0) { // no indexes
195 return "";
196 }
197 // For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list
198 int index_elements_length = index_elements.getLength ();
199 for(int j = 0; j < index_elements_length; j++) {
200 Element index_element = (Element) index_elements.item (j);
201 String level_str = index_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE);
202 if(level_str.length () > 0) {
203 text.append (level_str);
204 text.append (StaticStrings.COLON_CHARACTER);
205 }
206 NodeList content_elements = index_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
207 int content_elements_length = content_elements.getLength ();
208 // Don't output anything if no indexes are set
209 if(content_elements_length == 0) {
210 return null;
211 }
212 for(int k = 0; k < content_elements_length; k++) {
213 Element content_element = (Element) content_elements.item (k);
214 String name_str = content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
215 text.append (name_str);
216 name_str = null;
217 if(k < content_elements_length - 1) {
218 text.append (StaticStrings.COMMA_CHARACTER);
219 }
220 content_element = null;
221 }
222 if(j < index_elements_length - 1) {
223 text.append (StaticStrings.SPACE_CHARACTER);
224 }
225 content_elements = null;
226 index_element = null;
227 }
228 index_elements = null;
229 return text.toString ();
230 }
231
232 static private String indexDefaultToString (Element command_element) {
233 StringBuffer text = new StringBuffer ("");
234 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
235 text.append ("#");
236 }
237 text.append (StaticStrings.INDEX_DEFAULT_STR);
238 text.append (StaticStrings.TAB_CHARACTER);
239 if (!command_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE).equals ("")) {
240 text.append (command_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE));
241 text.append (StaticStrings.COLON_CHARACTER);
242 }
243 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
244 int content_elements_length = content_elements.getLength ();
245 for(int j = 0; j < content_elements_length; j++) {
246 Element content_element = (Element) content_elements.item (j);
247 String name_str = content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
248 text.append (name_str);
249 name_str = null;
250 if(j < content_elements_length - 1) {
251 text.append (StaticStrings.COMMA_CHARACTER);
252 }
253 content_element = null;
254 }
255 content_elements = null;
256 return text.toString ();
257 }
258
259 static private String languagesToString (Element command_element) {
260 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGES_STR);
261 text.append (StaticStrings.TAB_CHARACTER);
262 // Retrieve all the languages and write them out in a space separated list
263 NodeList language_elements = command_element.getElementsByTagName (StaticStrings.LANGUAGE_ELEMENT);
264 int language_elements_length = language_elements.getLength ();
265 if(language_elements_length == 0) {
266 return null;
267 }
268 for(int j = 0; j < language_elements_length; j++) {
269 Element language_element = (Element) language_elements.item (j);
270 text.append (language_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
271 if(j < language_elements_length - 1) {
272 text.append (StaticStrings.SPACE_CHARACTER);
273 }
274 }
275 return text.toString ();
276 }
277
278 static private String languageDefaultToString (Element command_element) {
279 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGE_DEFAULT_STR);
280 text.append (StaticStrings.TAB_CHARACTER);
281 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
282 return text.toString ();
283 }
284
285 static private String languageMetadataToString (Element command_element) {
286 if (!command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
287 return "";
288 }
289 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGE_METADATA_STR);
290 text.append (StaticStrings.TAB_CHARACTER);
291 String name_str = command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
292 text.append (name_str);
293 return text.toString ();
294 }
295
296 static private String indexOptionsToString (Element command_element) {
297 StringBuffer text = new StringBuffer ("");
298 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
299 text.append ("#");
300 }
301 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
302 text.append (StaticStrings.TAB_CHARACTER);
303 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.INDEXOPTION_ELEMENT);
304 int content_elements_length = content_elements.getLength ();
305 // Don't output anything if no options are set.
306 if(content_elements_length == 0) {
307 return null;
308 }
309 for(int i = 0; i < content_elements_length; i++) {
310 Element content_element = (Element) content_elements.item (i);
311 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
312 text.append (StaticStrings.SPACE_CHARACTER);
313 }
314 return text.substring (0, text.length () - 1);
315 }
316
317 static private String indexOptionDefaultToString (Element command_element) {
318 // Don't bother if there is no value
319 if (command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE).equals ("")) {
320 return "";
321 }
322 StringBuffer text = new StringBuffer ("");
323 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
324 text.append ("#");
325 }
326 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
327 text.append (StaticStrings.TAB_CHARACTER);
328 text.append (command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE));
329 return text.toString ();
330 }
331
332 static private String metadataToString(Element command_element) {
333 return metadataToString(command_element, false);
334 }
335 static private String metadataToString (Element command_element, boolean use_dot) {
336 // lets first check the value - if its empty, don't bother sticking it in the config file
337 String value_str = XMLTools.getValue (command_element);
338 if (value_str.equals ("")) {
339 return "";
340 }
341 boolean special = false;
342
343 StringBuffer text = new StringBuffer ("");
344 String name_str = command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
345 // If the name is one of the special four, we don't write the collectionmeta first. Note maintainer and buildtype are singled out for 'prittying' reasons.
346 if(name_str.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR)|| name_str.equals (StaticStrings.BUILDTYPE_STR) || name_str.equals (StaticStrings.DATABASETYPE_STR)) {
347 text.append (name_str);
348 text.append (StaticStrings.TAB_CHARACTER);
349 special = true;
350 }
351 else if (name_str.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR) || name_str.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR) ) {
352 text.append (name_str);
353 text.append (StaticStrings.TAB_CHARACTER);
354 text.append (StaticStrings.TAB_CHARACTER);
355 special = true;
356 }
357 else {
358 text.append (StaticStrings.COLLECTIONMETADATA_STR);
359 text.append (StaticStrings.TAB_CHARACTER);
360 if (use_dot) {
361 text.append(StaticStrings.STOP_CHARACTER);
362 }
363 text.append (name_str);
364 text.append (StaticStrings.SPACE_CHARACTER);
365 String language_str = command_element.getAttribute (StaticStrings.LANGUAGE_ATTRIBUTE);
366 text.append (StaticStrings.LBRACKET_CHARACTER);
367 text.append (StaticStrings.LANGUAGE_ARGUMENT);
368 text.append (language_str);
369 text.append (StaticStrings.RBRACKET_CHARACTER);
370 text.append (StaticStrings.SPACE_CHARACTER);
371 }
372 name_str = null;
373
374 // decode the value from XML to a form for config file
375 value_str = Codec.transform (value_str, Codec.DOM_TO_GREENSTONE);
376
377 // We don't wrap the email addresses in quotes, nor the other special metadata
378 if(special) {
379 text.append (value_str);
380 }
381 else {
382 text.append (StaticStrings.SPEECH_CHARACTER);
383 text.append (value_str);
384 text.append (StaticStrings.SPEECH_CHARACTER);
385 }
386 value_str = null;
387 return text.toString ();
388 }
389
390 static private String searchtypeToString (Element command_element) {
391 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
392 StringBuffer text = new StringBuffer (StaticStrings.SEARCHTYPE_STR);
393 text.append (StaticStrings.TAB_CHARACTER);
394 NodeList search_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
395 int search_elements_length = search_elements.getLength ();
396 for(int i = 0; i < search_elements_length; i++) {
397 Element search_element = (Element) search_elements.item (i);
398 text.append (search_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
399 text.append (StaticStrings.SPACE_CHARACTER);
400 }
401 return text.substring (0, text.length () - 1);
402 }
403 else {
404 return null;
405 }
406 }
407
408 static private String subcollectionToString (Element command_element) {
409 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_STR);
410 text.append (StaticStrings.SPACE_CHARACTER);
411 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
412 text.append (StaticStrings.SPACE_CHARACTER);
413 text.append (StaticStrings.TAB_CHARACTER);
414 text.append (StaticStrings.SPEECH_CHARACTER);
415 if(command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE).equals (StaticStrings.EXCLUDE_STR)) {
416 text.append (StaticStrings.EXCLAMATION_CHARACTER);
417 }
418 text.append (command_element.getAttribute (StaticStrings.CONTENT_ATTRIBUTE));
419 text.append (StaticStrings.SEPARATOR_CHARACTER);
420 text.append (XMLTools.getValue (command_element));
421 text.append (StaticStrings.SEPARATOR_CHARACTER);
422 String options_str = command_element.getAttribute (StaticStrings.OPTIONS_ATTRIBUTE);
423 if(options_str.length () > 0) {
424 text.append (options_str);
425 }
426 options_str = null;
427 text.append (StaticStrings.SPEECH_CHARACTER);
428 return text.toString ();
429 }
430
431 static private String subcollectionDefaultIndexToString (Element command_element) {
432 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_STR);
433 text.append (StaticStrings.TAB_CHARACTER);
434 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
435 int content_elements_length = content_elements.getLength ();
436 for(int j = 0; j < content_elements_length; j++) {
437 Element content_element = (Element) content_elements.item (j);
438 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
439 if(j < content_elements_length - 1) {
440 text.append (StaticStrings.COMMA_CHARACTER);
441 }
442 }
443 return text.toString ();
444 }
445
446 static private String subcollectionIndexesToString (Element command_element) {
447 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_INDEX_STR);
448 text.append (StaticStrings.TAB_CHARACTER);
449 // Retrieve all of the subcollection index partitions
450 NodeList subcollectionindex_elements = command_element.getElementsByTagName (StaticStrings.INDEX_ELEMENT);
451 int subcollectionindex_elements_length = subcollectionindex_elements.getLength ();
452 if(subcollectionindex_elements_length == 0) {
453 return null;
454 }
455 for(int j = 0; j < subcollectionindex_elements_length; j++) {
456 Element subcollectionindex_element = (Element) subcollectionindex_elements.item (j);
457 NodeList content_elements = subcollectionindex_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
458 int content_elements_length = content_elements.getLength ();
459 for(int k = 0; k < content_elements_length; k++) {
460 Element content_element = (Element) content_elements.item (k);
461 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
462 if(k < content_elements_length - 1) {
463 text.append (StaticStrings.COMMA_CHARACTER);
464 }
465 }
466 if(j < subcollectionindex_elements_length - 1) {
467 text.append (StaticStrings.SPACE_CHARACTER);
468 }
469 }
470 return text.toString ();
471 }
472
473 static private String supercollectionToString (Element command_element) {
474 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.COLLECTION_ELEMENT);
475 int content_elements_length = content_elements.getLength ();
476 if(content_elements_length > 1) {
477 StringBuffer text = new StringBuffer (StaticStrings.SUPERCOLLECTION_STR);
478 text.append (StaticStrings.TAB_CHARACTER);
479 for(int j = 0; j < content_elements_length; j++) {
480 Element content_element = (Element) content_elements.item (j);
481 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
482 if(j < content_elements_length - 1) {
483 text.append (StaticStrings.SPACE_CHARACTER);
484 }
485 }
486 return text.toString ();
487 }
488 return null;
489 }
490
491 static private String unknownToString (Element command_element) {
492 return XMLTools.getValue (command_element);
493 }
494
495
496 /** Parse a collect.cfg into a DOM model representation.
497 * note we are ignoring 2.39 compatibility now. */
498 static public String parse (File collect_cfg_file, Document document) {
499 // hack for pre 2.71 compatibility - we need to add in a
500 // build type if there is not one there
501 boolean search_types_parsed = false;
502 boolean build_types_parsed = false;
503 try {
504 StringBuffer saved_collect_cfg_string_buffer = new StringBuffer ();
505
506 Element collect_cfg_element = document.getDocumentElement ();
507 // Read in the file one command at a time.
508 InputStream istream = new FileInputStream (collect_cfg_file);
509 Reader in_reader = new InputStreamReader (istream, CollectionConfiguration.ENCODING);
510 BufferedReader in = new BufferedReader (in_reader);
511 String command_str = null;
512 while((command_str = in.readLine ()) != null) {
513 saved_collect_cfg_string_buffer.append (command_str + "\n");
514
515 boolean append_element = true;
516 Element command_element = null;
517 // A command may be broken over several lines.
518 command_str = command_str.trim ();
519 boolean eof = false;
520 while(!eof && command_str.endsWith (StaticStrings.NEWLINE_CHARACTER)) {
521 String next_line = in.readLine ();
522 if(next_line != null) {
523 next_line = next_line.trim ();
524 if(next_line.length () > 0) {
525 // Remove the new line character
526 command_str = command_str.substring (0, command_str.lastIndexOf (StaticStrings.NEWLINE_CHARACTER));
527 // And append the next line, which due to the test above must be non-zero length
528 command_str = command_str + next_line;
529 }
530 next_line = null;
531 }
532 // If we've reached the end of the file theres nothing more we can do
533 else {
534 eof = true;
535 }
536 }
537 // If there is still a new line character, then we remove it and hope for the best
538 if(command_str.endsWith (StaticStrings.NEWLINE_CHARACTER)) {
539 command_str = command_str.substring (0, command_str.lastIndexOf (StaticStrings.NEWLINE_CHARACTER));
540 }
541 // Now we've either got a command to parse...
542 if(command_str.length () != 0) {
543 // Start trying to figure out what it is
544 //StringTokenizer tokenizer = new StringTokenizer(command_str);
545 // Instead of a standard string tokenizer I'm going to use the new version of CommandTokenizer, which is not only smart enough to correctly notice speech marks and correctly parse them out, but now also takes the input stream so it can rebuild tokens that stretch over several lines.
546 CommandTokenizer tokenizer = new CommandTokenizer (command_str, in);
547 String command_type = tokenizer.nextToken ().toLowerCase ();
548 // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created
549 if(command_element == null && command_type.equals (StaticStrings.CLASSIFY_STR)) {
550 command_element = parseClassify (command_str, document);
551 }
552 if(command_element == null && command_type.equals (StaticStrings.FORMAT_STR)) {
553 command_element = parseFormat (tokenizer, document); // Revised to handle multiple lines
554 }
555 if(command_element == null && (command_type.equals (StaticStrings.INDEX_STR) || command_type.equals (StaticStrings.COMMENTED_INDEXES_STR))) {
556 command_element = parseIndex (command_str, document);
557 }
558 if(command_element == null && (command_type.equals (StaticStrings.INDEX_DEFAULT_STR) || command_type.equals (StaticStrings.COMMENTED_INDEX_DEFAULT_STR))) {
559
560 command_element = parseIndexDefault (command_str, document);
561 }
562 if(command_element == null && command_type.equals (StaticStrings.LANGUAGES_STR)) {
563 command_element = parseLanguage (command_str, document);
564 }
565 if(command_element == null && command_type.equals (StaticStrings.LANGUAGE_DEFAULT_STR)) {
566 command_element = parseLanguageDefault (command_str, document);
567 }
568 if (command_element == null && command_type.equals (StaticStrings.LANGUAGE_METADATA_STR)) {
569 command_element = parseLanguageMetadata (command_str, document);
570 }
571 if(command_element == null && command_type.equals (StaticStrings.LEVELS_STR)) {
572 command_element = parseIndexOptions (command_str, document, StaticStrings.LEVELS_STR, true);
573 }
574 if (command_element == null && command_type.equals (StaticStrings.COMMENTED_LEVELS_STR)) {
575 command_element = parseIndexOptions (command_str, document, StaticStrings.LEVELS_STR, false);
576 }
577 if(command_element == null && command_type.equals (StaticStrings.LEVEL_DEFAULT_STR)) {
578 command_element = parseIndexOptionDefault (command_str, document, StaticStrings.LEVEL_DEFAULT_STR, true);
579 }
580 if(command_element == null && command_type.equals (StaticStrings.COMMENTED_LEVEL_DEFAULT_STR)) {
581 command_element = parseIndexOptionDefault (command_str, document, StaticStrings.LEVEL_DEFAULT_STR, false);
582 }
583 if (command_element == null && command_type.equals (StaticStrings.INDEXOPTIONS_STR)) {
584 command_element = parseIndexOptions (command_str, document, StaticStrings.INDEXOPTIONS_STR, true);
585 }
586 if (command_element == null && command_type.equals (StaticStrings.COMMENTED_INDEXOPTIONS_STR)) {
587 command_element = parseIndexOptions (command_str, document, StaticStrings.INDEXOPTIONS_STR, false);
588 }
589 if(command_element == null && command_type.equals (StaticStrings.COLLECTIONMETADATA_STR)) {
590 command_element = parseMetadata (tokenizer, document); // Revised to handle multiple lines
591 }
592 if(command_element == null && (command_type.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR) || command_type.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR) || command_type.equals (StaticStrings.BUILDTYPE_STR) || command_type.equals (StaticStrings.DATABASETYPE_STR))) {
593 command_element = parseMetadataSpecial (command_str, document);
594 // pre 2.71 hack
595 if (command_type.equals (StaticStrings.BUILDTYPE_STR)) {
596 build_types_parsed = true;
597 }
598 }
599 if(command_element == null && command_type.equals (StaticStrings.PLUGIN_STR)) {
600 command_element = parsePlugin (command_str, document);
601 }
602 // leave here for backwards compatibility
603 if(command_element == null && command_type.equals (StaticStrings.SEARCHTYPE_STR)) {
604 command_element = parseSearchType (command_str, document);
605 // pre 2.71 hack
606 search_types_parsed = true;
607
608 }
609 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_STR)) {
610 command_element = parseSubCollection (command_str, document);
611 }
612 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_STR)) {
613 command_element = parseSubCollectionDefaultIndex (command_str, document);
614 }
615 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_INDEX_STR)) {
616 command_element = parseSubCollectionIndex (command_str, document);
617 }
618 if(command_element == null && (command_type.equals (StaticStrings.SUPERCOLLECTION_STR) || command_type.equals (StaticStrings.CCS_STR))) {
619 command_element = parseSuperCollection (command_str, document);
620 }
621 // Doesn't match any known type
622 command_type = null;
623 if(command_element == null) {
624 // No-one knows what to do with this command, so we create an Unknown command element
625 command_element = document.createElement (StaticStrings.UNKNOWN_ELEMENT);
626 XMLTools.setValue (command_element, command_str);
627 }
628 }
629 // Or an empty line to remember for later
630 else {
631 command_element = document.createElement (CollectionConfiguration.NEWLINE_ELEMENT);
632 }
633 // Now command element shouldn't be null so we append it to the collection config DOM, but only if we haven't been told not to add it
634 //if(append_element) {
635 collect_cfg_element.appendChild (command_element);
636 //}
637 }
638 if (!build_types_parsed) {
639 String buildtype_type = BuildTypeManager.BUILD_TYPE_MG;
640 if (search_types_parsed) {
641 buildtype_type = BuildTypeManager.BUILD_TYPE_MGPP;
642 }
643 Element command_element = parseMetadataSpecial (StaticStrings.BUILDTYPE_STR+" "+buildtype_type, document);
644 Node target_node = CollectionConfiguration.findInsertionPoint (command_element);
645 if(target_node != null) {
646 collect_cfg_element.insertBefore (command_element, target_node);
647 }
648 else {
649 collect_cfg_element.appendChild (command_element);
650 }
651
652 }
653 return saved_collect_cfg_string_buffer.toString();
654 }
655 catch(Exception exception) {
656 DebugStream.println ("Error in CollectionConfiguration.parse(java.io.File): " + exception);
657 DebugStream.printStackTrace (exception);
658 }
659
660 return null;
661 }
662
663
664 /** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix.
665 * @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string
666 * @return a HashMap containing the arguments parsed
667 */
668 static public HashMap parseArguments (CommandTokenizer tokenizer) {
669 HashMap arguments = new HashMap ();
670 String name = null;
671 String value = null;
672 while(tokenizer.hasMoreTokens () || name != null) {
673 // First we retrieve a name if we need one.
674 if(name == null) {
675 name = tokenizer.nextToken ();
676 }
677 // Now we attempt to retrieve a value
678 if(tokenizer.hasMoreTokens ()) {
679 value = tokenizer.nextToken ();
680 // Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop.
681 // The value is not a name if it contains a space character: it's a quoted value
682 if (value.startsWith(StaticStrings.MINUS_CHARACTER) && value.indexOf(StaticStrings.SPACE_CHARACTER) == -1) {
683 arguments.put (name, null);
684 name = value;
685 }
686 // Otherwise we have a typical name->value pair ready to go
687 else {
688 arguments.put (name, value);
689 name = null;
690 }
691 }
692 // Otherwise its a binary flag
693 else {
694 arguments.put (name, null);
695 name = null;
696 }
697 }
698 return arguments;
699 }
700
701 static private Element parseClassify (String command_str, Document document) {
702 Element command_element = null;
703 try {
704 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
705 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
706 if(tokenizer.countTokens () >= 2) { // Must support "classify Phind" (no args)
707 command_element = document.createElement (StaticStrings.CLASSIFY_ELEMENT);
708 // First token is classify
709 tokenizer.nextToken ();
710 // The next token is the classifier type
711 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, tokenizer.nextToken ());
712 // Now we parse out the remaining arguments into a hashmapping from name to value
713 HashMap arguments = parseArguments (tokenizer);
714 // Assign the arguments as Option elements
715 Iterator names = arguments.keySet ().iterator ();
716 while(names.hasNext ()) {
717 String name = (String) names.next ();
718 String value = (String) arguments.get (name); // Can be null
719 Element option_element = document.createElement (StaticStrings.OPTION_ELEMENT);
720 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name.substring (1));
721 if(value != null) {
722 // Remove any speech marks appended in strings containing whitespace
723 if(value.startsWith (StaticStrings.SPEECH_CHARACTER) && value.endsWith (StaticStrings.SPEECH_CHARACTER)) {
724 value = value.substring (1, value.length () - 1);
725 }
726 XMLTools.setValue (option_element, value);
727 }
728 option_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
729 command_element.appendChild (option_element);
730 option_element = null;
731 name = null;
732 value = null;
733 }
734 names = null;
735 arguments = null;
736 }
737 tokenizer = null;
738 }
739 catch(Exception error) {
740 }
741 return command_element;
742 }
743
744 static private Element parseFormat (CommandTokenizer tokenizer, Document document) {
745 Element command_element = null;
746 try {
747 command_element = document.createElement (StaticStrings.FORMAT_ELEMENT);
748 String name_str = tokenizer.nextToken ();
749 String value_str = tokenizer.nextToken ();
750 if(name_str != null && value_str != null) {
751 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
752 // If the value is true or false we add it as an attribute
753 if(value_str.equalsIgnoreCase (StaticStrings.TRUE_STR) || value_str.equalsIgnoreCase (StaticStrings.FALSE_STR)) {
754 command_element.setAttribute (StaticStrings.VALUE_ATTRIBUTE, value_str.toLowerCase ());
755 }
756 // Otherwise it gets added as a text node
757 else {
758 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
759 value_str = Codec.transform (value_str, Codec.GREENSTONE_TO_DOM);
760 XMLTools.setValue (command_element, value_str);
761 }
762 }
763 else {
764 command_element = null;
765 }
766 name_str = null;
767 value_str = null;
768 }
769 catch (Exception exception) {
770 DebugStream.printStackTrace (exception);
771 command_element = null;
772 }
773 return command_element;
774 }
775
776 static private Element parseIndex (String command_str, Document document) {
777 Element command_element = null;
778 try {
779 StringTokenizer tokenizer = new StringTokenizer (command_str);
780 String command = tokenizer.nextToken ();
781 command_element = document.createElement (StaticStrings.INDEXES_ELEMENT);
782 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (command.equals (StaticStrings.INDEX_STR) ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
783 command = null;
784 if(!tokenizer.hasMoreTokens ()) {
785
786 // there are no indexes
787 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.FALSE_STR);
788 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.FALSE_STR); // for now
789 tokenizer = null;
790 return command_element;
791 }
792
793 while(tokenizer.hasMoreTokens ()) {
794 Element index_element = document.createElement (StaticStrings.INDEX_ELEMENT);
795 String index_str = tokenizer.nextToken ();
796 // There are two types of index we have to consider. MG versions use "level:source,source" while MGPP versions use "source,source source"
797 if(index_str.indexOf (StaticStrings.COLON_CHARACTER) != -1) {
798 index_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE, index_str.substring (0, index_str.indexOf (StaticStrings.COLON_CHARACTER)));
799 index_str = index_str.substring (index_str.indexOf (StaticStrings.COLON_CHARACTER) + 1);
800 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.FALSE_STR);
801 }
802 else {
803 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.TRUE_STR);
804 }
805 StringTokenizer content_tokenizer = new StringTokenizer (index_str, StaticStrings.COMMA_CHARACTER);
806 while(content_tokenizer.hasMoreTokens ()) {
807 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
808 String content_str = content_tokenizer.nextToken ();
809 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
810 if(content_str.indexOf (StaticStrings.NS_SEP) == -1) {
811 if(content_str.equals (StaticStrings.TEXT_STR) || content_str.equals (StaticStrings.ALLFIELDS_STR) || content_str.equals(StaticStrings.METADATA_STR)) {
812 // Our special strings are OK.
813 }
814 else {
815 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
816 }
817 }
818 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_str);
819 index_element.appendChild (content_element);
820 content_element = null;
821 }
822 content_tokenizer = null;
823 index_str = null;
824 command_element.appendChild (index_element);
825 index_element = null;
826 }
827 tokenizer = null;
828 }
829 catch (Exception exception) {
830 exception.printStackTrace ();
831 }
832 return command_element;
833 }
834
835 static private Element parseIndexDefault (String command_str, Document document) {
836 Element command_element = null;
837 try {
838 StringTokenizer tokenizer = new StringTokenizer (command_str);
839 if(tokenizer.countTokens () >= 2) {
840 command_element = document.createElement (StaticStrings.INDEX_DEFAULT_ELEMENT);
841 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken ()).equals (StaticStrings.INDEX_DEFAULT_STR) ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
842 String index_str = tokenizer.nextToken ();
843 String level="";
844 if (index_str.indexOf (StaticStrings.COLON_CHARACTER) !=-1) {
845 level = index_str.substring (0, index_str.indexOf (StaticStrings.COLON_CHARACTER));
846 }
847
848 command_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE,level);
849
850 String content_str = index_str;
851
852 if (index_str.indexOf (StaticStrings.COLON_CHARACTER) !=-1) {
853 content_str = index_str.substring (index_str.indexOf (StaticStrings.COLON_CHARACTER) + 1);
854 }
855
856 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
857 while(content_tokenizer.hasMoreTokens ()) {
858 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
859 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
860 command_element.appendChild (content_element);
861 content_element = null;
862 }
863 content_tokenizer = null;
864 content_str = null;
865 content_str = null;
866 index_str = null;
867 }
868 tokenizer = null;
869 }
870 catch (Exception exception) {
871 }
872 return command_element;
873 }
874
875 static private Element parseLanguage (String command_str, Document document) {
876 Element command_element = null;
877 try {
878 StringTokenizer tokenizer = new StringTokenizer (command_str);
879 tokenizer.nextToken ();
880 if(tokenizer.hasMoreTokens ()) {
881 command_element = document.createElement (StaticStrings.LANGUAGES_ELEMENT);
882 while(tokenizer.hasMoreTokens ()) {
883 Element language_element = document.createElement (StaticStrings.LANGUAGE_ELEMENT);
884 language_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
885 command_element.appendChild (language_element);
886 language_element = null;
887 }
888 }
889 tokenizer = null;
890 }
891 catch (Exception exception) {
892 }
893 return command_element;
894 }
895
896 static private Element parseLanguageDefault (String command_str, Document document) {
897 Element command_element = null;
898 try {
899 StringTokenizer tokenizer = new StringTokenizer (command_str);
900 if(tokenizer.countTokens () >= 2) {
901 command_element = document.createElement (StaticStrings.LANGUAGE_DEFAULT_ELEMENT);
902 tokenizer.nextToken ();
903 String default_language_str = tokenizer.nextToken ();
904 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, default_language_str);
905 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
906 default_language_str = null;
907 }
908 tokenizer = null;
909 }
910 catch (Exception exception) {
911 }
912 return command_element;
913 }
914
915 static private Element parseLanguageMetadata (String command_str, Document document) {
916 Element command_element = null;
917 try {
918 StringTokenizer tokenizer = new StringTokenizer (command_str);
919 if(tokenizer.countTokens () >= 2) {
920 command_element = document.createElement (StaticStrings.LANGUAGE_METADATA_ELEMENT);
921 tokenizer.nextToken ();
922 String language_metadata_str = tokenizer.nextToken ();
923 if (language_metadata_str.indexOf (StaticStrings.NS_SEP) == -1) {
924 language_metadata_str = StaticStrings.EXTRACTED_NAMESPACE + language_metadata_str;
925 }
926 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, language_metadata_str);
927 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
928 language_metadata_str = null;
929 }
930 tokenizer = null;
931
932 }
933 catch (Exception exception) {
934 }
935 return command_element;
936 }
937
938 static private Element parseIndexOptions (String command_str, Document document, String type, boolean assigned) {
939 Element command_element = null;
940 try {
941 StringTokenizer tokenizer = new StringTokenizer (command_str);
942 // First token is command type
943 String command = tokenizer.nextToken ();
944 if(tokenizer.hasMoreTokens ()) {
945 command_element = document.createElement (StaticStrings.INDEXOPTIONS_ELEMENT);
946 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE,type);
947 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (assigned ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
948 while(tokenizer.hasMoreTokens ()) {
949 Element option_element = document.createElement (StaticStrings.INDEXOPTION_ELEMENT);
950 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
951 command_element.appendChild (option_element);
952 option_element = null;
953 }
954 }
955 command = null;
956 }
957 catch(Exception exception) {
958 }
959 return command_element;
960 }
961
962 static private Element parseIndexOptionDefault (String command_str, Document document, String type, boolean assigned) {
963 Element command_element = null;
964 try {
965 StringTokenizer tokenizer = new StringTokenizer (command_str);
966 // First token is command type
967 String command = tokenizer.nextToken ();
968 if(tokenizer.hasMoreTokens ()) {
969 command_element = document.createElement (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT);
970 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (assigned ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR)); // is it commented out or not?
971 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, type);
972 command_element.setAttribute (StaticStrings.VALUE_ATTRIBUTE, tokenizer.nextToken ());
973 }
974
975 tokenizer = null;
976 }
977 catch (Exception exception) {
978 }
979 return command_element;
980 }
981
982 static private Element parseMetadata (CommandTokenizer tokenizer, Document document) {
983 Element command_element = null;
984 try {
985 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_ELEMENT);
986 String name_str = tokenizer.nextToken ();
987 String value_str = tokenizer.nextToken ();
988 if(name_str != null && value_str != null) {
989 String language_str = Configuration.getLanguage ();
990 // Check if the value string is actually a language string
991 if(value_str.startsWith (StaticStrings.LBRACKET_CHARACTER) && value_str.endsWith (StaticStrings.RBRACKET_CHARACTER)) {
992 language_str = value_str.substring (value_str.indexOf (StaticStrings.LANGUAGE_ARGUMENT) + 2, value_str.length () - 1);
993 value_str = tokenizer.nextToken ();
994 }
995 if(value_str != null) {
996 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
997 value_str = Codec.transform (value_str, Codec.GREENSTONE_TO_DOM);
998 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
999 command_element.setAttribute (StaticStrings.LANGUAGE_ATTRIBUTE, language_str);
1000 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1001 XMLTools.setValue (command_element, value_str);
1002 }
1003 else {
1004 command_element = null;
1005 }
1006 language_str = null;
1007 }
1008 else {
1009 command_element = null;
1010 }
1011 name_str = null;
1012 value_str = null;
1013 }
1014 catch (Exception exception) {
1015 DebugStream.printStackTrace (exception);
1016 command_element = null;
1017 }
1018 return command_element;
1019 }
1020
1021 static private Element parseMetadataSpecial (String command_str, Document document) {
1022 Element command_element = null;
1023 try {
1024 StringTokenizer tokenizer = new StringTokenizer (command_str);
1025 if(tokenizer.countTokens () >= 2) {
1026 String name_str = tokenizer.nextToken ();
1027 String value_str = tokenizer.nextToken ();
1028 if (name_str.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR)) {
1029 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT);
1030 }
1031 else if(name_str.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR)) {
1032 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT);
1033 }
1034 else if(name_str.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR)) {
1035 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT);
1036 }
1037 else if (name_str.equals (StaticStrings.BUILDTYPE_STR)) {
1038 command_element = document.createElement (StaticStrings.BUILDTYPE_ELEMENT);
1039 }
1040 else if (name_str.equals (StaticStrings.DATABASETYPE_STR)) {
1041 command_element = document.createElement (StaticStrings.DATABASETYPE_ELEMENT);
1042 }
1043 if(command_element != null) {
1044 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
1045 command_element.setAttribute (StaticStrings.LANGUAGE_ATTRIBUTE, StaticStrings.ENGLISH_LANGUAGE_STR);
1046 command_element.setAttribute (StaticStrings.SPECIAL_ATTRIBUTE, StaticStrings.TRUE_STR);
1047 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1048 if(value_str.startsWith (StaticStrings.SPEECH_CHARACTER) && value_str.endsWith (StaticStrings.SPEECH_CHARACTER)) {
1049 value_str = value_str.substring (1, value_str.length () - 1);
1050 }
1051 XMLTools.setValue (command_element, value_str);
1052 }
1053 value_str = null;
1054 name_str = null;
1055 }
1056 tokenizer = null;
1057 }
1058 catch (Exception exception) {
1059 }
1060 return command_element;
1061 }
1062
1063 static private Element parsePlugin (String command_str, Document document) {
1064 Element command_element = null;
1065 try {
1066 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
1067 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1068 if(tokenizer.countTokens () >= 2) {
1069 command_element = document.createElement (StaticStrings.PLUGIN_ELEMENT);
1070 // First token is plugin
1071 tokenizer.nextToken ();
1072 // The next token is the type
1073 String type = tokenizer.nextToken ();
1074 type = Utility.ensureNewPluginName(type);
1075 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, type);
1076 // Now we parse out the remaining arguments into a hashmapping from name to value
1077 HashMap arguments = parseArguments (tokenizer);
1078 // also watch out for the deprecated -use_metadata_files option to RecPlug and remove it
1079 Iterator names = arguments.keySet ().iterator ();
1080 while(names.hasNext ()) {
1081 String name = (String) names.next ();
1082 String value = (String) arguments.get (name); // Can be null
1083
1084 if(type.equals (StaticStrings.RECPLUG_STR) && name.substring (1).equals (StaticStrings.USE_METADATA_FILES_ARGUMENT)) {
1085 continue; // ignore this option
1086 }
1087 Element option_element = document.createElement (StaticStrings.OPTION_ELEMENT);
1088 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name.substring (1));
1089 option_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1090 if(value != null) {
1091 // Remove any speech marks appended in strings containing whitespace
1092 if(value.startsWith (StaticStrings.SPEECH_CHARACTER) && value.endsWith (StaticStrings.SPEECH_CHARACTER)) {
1093 value = value.substring (1, value.length () - 1);
1094 }
1095 XMLTools.setValue (option_element, value);
1096 }
1097 command_element.appendChild (option_element);
1098 option_element = null;
1099 name = null;
1100 value = null;
1101 }
1102
1103 type = null;
1104 names = null;
1105 arguments = null;
1106 }
1107 tokenizer = null;
1108 }
1109 catch(Exception exception) {
1110 // This catch clause had been left empty. If this is deliberate then
1111 // we should have a comment here explaining why there is no need to
1112 // print anything out. Am assuming this is mistake for now, and
1113 // have added in a call to printStackTrace()
1114 System.err.println("Malformed plugin statement");
1115 exception.printStackTrace();
1116 }
1117 return command_element;
1118 }
1119
1120 /* search types are now handled as formats - leave this here to convert in case we have an old config file */
1121 static private Element parseSearchType (String command_str, Document document) {
1122 Element command_element = null;
1123 try {
1124 StringTokenizer tokenizer = new StringTokenizer (command_str);
1125 // First token is command type (searchtype)
1126 tokenizer.nextToken ();
1127 if(tokenizer.hasMoreTokens ()) {
1128 command_element = document.createElement (StaticStrings.FORMAT_ELEMENT);
1129 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, "SearchTypes");
1130 String value = tokenizer.nextToken ();
1131 while(tokenizer.hasMoreTokens ()) {
1132 value += ","+tokenizer.nextToken ();
1133 }
1134 value = Codec.transform (value, Codec.GREENSTONE_TO_DOM);
1135 XMLTools.setValue (command_element, value);
1136 }
1137 }
1138 catch(Exception exception) {
1139 }
1140 return command_element;
1141 }
1142
1143 static private Element parseSubCollection (String command_str, Document document) {
1144 Element command_element = null;
1145 try {
1146 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
1147 if(tokenizer.countTokens () >= 3) {
1148 command_element = document.createElement (StaticStrings.SUBCOLLECTION_ELEMENT);
1149 // First token is command type
1150 tokenizer.nextToken ();
1151 // Then subcollection identifier
1152 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
1153 // Then finally the pattern used to build the subcollection partition
1154 String full_pattern_str = tokenizer.nextToken ();
1155 // Set inclusion/exclusion flag and remove any exclamation mark
1156 boolean exclusion = full_pattern_str.startsWith (StaticStrings.EXCLAMATION_CHARACTER);
1157 if (exclusion) {
1158 full_pattern_str = full_pattern_str.substring (1, full_pattern_str.length ());
1159 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, StaticStrings.EXCLUDE_STR);
1160 }
1161 else {
1162 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, StaticStrings.INCLUDE_STR);
1163 }
1164
1165 // Let's make sure it is a valid Greenstone configuration line
1166 String[] results = full_pattern_str.split("\\" + StaticStrings.SEPARATOR_CHARACTER, 3);
1167
1168 if (results.length >= 2) {
1169 String content_str = results[0];
1170 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1171 if (!content_str.equals (StaticStrings.FILENAME_STR) && content_str.indexOf (StaticStrings.NS_SEP) == -1) {
1172 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
1173 }
1174 command_element.setAttribute (StaticStrings.CONTENT_ATTRIBUTE, content_str);
1175 XMLTools.setValue (command_element, results[1]);
1176 if (results.length >= 3) {
1177 command_element.setAttribute (StaticStrings.OPTIONS_ATTRIBUTE, results[2]);
1178 }
1179 }
1180 }
1181 }
1182 catch(Exception exception) {
1183 exception.printStackTrace ();
1184 }
1185 return command_element;
1186 }
1187
1188 static private Element parseSubCollectionDefaultIndex (String command_str, Document document) {
1189 Element command_element = null;
1190 try {
1191 StringTokenizer tokenizer = new StringTokenizer (command_str);
1192 if(tokenizer.countTokens () == 2) {
1193 command_element = document.createElement (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT);
1194 tokenizer.nextToken ();
1195 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1196 String content_str = tokenizer.nextToken ();
1197 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
1198 while(content_tokenizer.hasMoreTokens ()) {
1199 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
1200 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
1201 command_element.appendChild (content_element);
1202 content_element = null;
1203 }
1204 content_tokenizer = null;
1205 content_str = null;
1206 }
1207 tokenizer = null;
1208 }
1209 catch(Exception exception) {
1210 }
1211 return command_element;
1212 }
1213
1214 static private Element parseSubCollectionIndex (String command_str, Document document) {
1215 Element command_element = null;
1216 try {
1217 StringTokenizer tokenizer = new StringTokenizer (command_str);
1218 tokenizer.nextToken ();
1219 if(tokenizer.hasMoreTokens ()) {
1220 command_element = document.createElement (StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT);
1221 }
1222 while(tokenizer.hasMoreTokens ()) {
1223 Element subcollectionindex_element = document.createElement (StaticStrings.INDEX_ELEMENT);
1224 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1225 String content_str = tokenizer.nextToken ();
1226 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
1227 while(content_tokenizer.hasMoreTokens ()) {
1228 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
1229 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
1230 subcollectionindex_element.appendChild (content_element);
1231 content_element = null;
1232 }
1233 content_tokenizer = null;
1234 content_str = null;
1235 command_element.appendChild (subcollectionindex_element);
1236 subcollectionindex_element = null;
1237 }
1238 tokenizer = null;
1239 }
1240 catch (Exception exception) {
1241 }
1242 return command_element;
1243 }
1244
1245 static private Element parseSuperCollection (String command_str, Document document) {
1246 Element command_element = null;
1247 try {
1248 StringTokenizer tokenizer = new StringTokenizer (command_str);
1249 if(tokenizer.countTokens () >= 3) {
1250 command_element = document.createElement (StaticStrings.SUPERCOLLECTION_ELEMENT);
1251 tokenizer.nextToken ();
1252 while(tokenizer.hasMoreTokens ()) {
1253 Element collection_element = document.createElement (StaticStrings.COLLECTION_ELEMENT);
1254 collection_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
1255 command_element.appendChild (collection_element);
1256 collection_element = null;
1257 }
1258 }
1259 tokenizer = null;
1260 }
1261 catch(Exception exception) {
1262 }
1263 return command_element;
1264 }
1265
1266 static private String pluginToString (Element command_element) {
1267 if(command_element.getAttribute (StaticStrings.SEPARATOR_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
1268 return "";
1269 }
1270 StringBuffer text = new StringBuffer (StaticStrings.PLUGIN_STR);
1271 text.append (StaticStrings.TAB_CHARACTER);
1272 text.append (command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE));
1273 // Retrieve, and output, the arguments
1274 NodeList option_elements = command_element.getElementsByTagName (StaticStrings.OPTION_ELEMENT);
1275 int option_elements_length = option_elements.getLength ();
1276 if(option_elements_length > 0) {
1277 for(int j = 0; j < option_elements_length; j++) {
1278 Element option_element = (Element) option_elements.item (j);
1279 if(option_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
1280 text.append (StaticStrings.SPACE_CHARACTER);
1281 text.append (StaticStrings.MINUS_CHARACTER);
1282 text.append (option_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
1283 String value_str = XMLTools.getValue (option_element);
1284 if (value_str.length () > 0) {
1285 text.append (StaticStrings.SPACE_CHARACTER);
1286 if(value_str.indexOf (StaticStrings.SPACE_CHARACTER) != -1) {
1287 // enclose in quotes
1288 text.append(StaticStrings.SPEECH_CHARACTER);
1289 text.append(value_str);
1290 text.append(StaticStrings.SPEECH_CHARACTER);
1291 } else {
1292
1293 text.append(value_str);
1294 }
1295 }
1296
1297 value_str = null;
1298 }
1299 option_element = null;
1300 }
1301 }
1302 option_elements = null;
1303
1304 return text.toString ();
1305 }
1306
1307 static public String generateStringVersion(Document document) {
1308
1309 StringBuffer collect_cfg_string_buffer = new StringBuffer ();
1310 NodeList command_elements = document.getDocumentElement ().getChildNodes ();
1311 boolean just_wrote_blank_line = false; // Prevent two or more blank lines in a row
1312 for (int i = 0; i < command_elements.getLength (); i++) {
1313 Node command_node = command_elements.item (i);
1314 if (!(command_node instanceof Element)) {
1315 // We're only interested in Elements
1316 continue;
1317 }
1318 Element command_element = (Element) command_node;
1319
1320 // Handle NewLine elements (blank lines)
1321 if (command_element.getNodeName ().equals (CollectionConfiguration.NEWLINE_ELEMENT) && !just_wrote_blank_line) {
1322 collect_cfg_string_buffer.append ("\n");
1323 just_wrote_blank_line = true;
1324 }
1325
1326 // Anything else we write to file, but only if it has been assigned, except for index and level commands
1327 // (which just get commented out if unassigned -- a side effect of MG & MGPP compatibility)
1328 else if (!command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR) || command_element.getNodeName ().equals (StaticStrings.INDEXES_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEX_DEFAULT_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEXOPTIONS_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT)) {
1329 String command = toString(command_element);
1330
1331 if (command != null && command.length ()> 0 ) {
1332 collect_cfg_string_buffer.append (command + "\n");
1333 just_wrote_blank_line = false;
1334 }
1335 }
1336 }
1337
1338 return collect_cfg_string_buffer.toString ();
1339 }
1340
1341
1342}
Note: See TracBrowser for help on using the repository browser.