source: other-projects/FileTransfer-WebSocketPair/testGXTWithGreenstone/src/org/greenstone/gatherer/cdm/CollectCfgReadWrite.java@ 33053

Last change on this file since 33053 was 33053, checked in by ak19, 5 years ago

I still had some stuff of Nathan Kelly's (FileTransfer-WebSocketPair) sitting on my USB. Had already commited the Themes folder at the time, 2 years back. Not sure if he wanted this additional folder commited. But I didn't want to delete it and decided it will be better off on SVN. When we use his project, if we find we didn't need this test folder, we can remove it from svn then.

File size: 57.7 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Methods to read collect.cfg files into internal XML form, and write
9 * them back out again.
10 *
11 * Copyright (C) 1999 New Zealand Digital Library Project
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 *########################################################################
27 */
28package org.greenstone.gatherer.cdm;
29
30import java.io.BufferedReader;
31import java.io.File;
32import java.io.FileInputStream;
33import java.io.InputStream;
34import java.io.InputStreamReader;
35import java.io.Reader;
36
37import java.util.HashMap;
38import java.util.Iterator;
39import java.util.StringTokenizer;
40
41import org.greenstone.gatherer.DebugStream;
42import org.greenstone.gatherer.Configuration;
43import org.greenstone.gatherer.metadata.MetadataElement;
44import org.greenstone.gatherer.metadata.MetadataTools;
45import org.greenstone.gatherer.util.Codec;
46import org.greenstone.gatherer.util.XMLTools;
47import org.greenstone.gatherer.util.StaticStrings;
48import org.greenstone.gatherer.util.Utility;
49
50import org.w3c.dom.*;
51
52public class CollectCfgReadWrite {
53
54
55 static public String toString (Element command_element) {
56 String command_element_name = command_element.getNodeName ();
57 if(command_element_name.equals (StaticStrings.CLASSIFY_ELEMENT)) {
58 return classifyToString (command_element);
59 }
60 else if(command_element_name.equals (StaticStrings.FORMAT_ELEMENT)) {
61 return formatToString (command_element);
62 }
63 else if(command_element_name.equals (StaticStrings.INDEXES_ELEMENT)) {
64 return indexesToString (command_element);
65 }
66 else if(command_element_name.equals (StaticStrings.INDEX_DEFAULT_ELEMENT)) {
67 return indexDefaultToString (command_element);
68 }
69 else if(command_element_name.equals (StaticStrings.LANGUAGES_ELEMENT)) {
70 return languagesToString (command_element);
71 }
72 else if(command_element_name.equals (StaticStrings.LANGUAGE_DEFAULT_ELEMENT)) {
73 return languageDefaultToString (command_element);
74 }
75 else if (command_element_name.equals (StaticStrings.LANGUAGE_METADATA_ELEMENT)) {
76 return languageMetadataToString (command_element);
77 }
78 else if(command_element_name.equals (StaticStrings.INDEXOPTIONS_ELEMENT)) {
79 return indexOptionsToString (command_element);
80 }
81 else if(command_element_name.equals (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT)) {
82 return indexOptionDefaultToString (command_element);
83 }
84 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_ELEMENT)) {
85 return metadataToString (command_element);
86 }
87 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT)) {
88 return metadataToString (command_element);
89 }
90 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT)) {
91 return metadataToString (command_element);
92 }
93 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT)) {
94 return metadataToString (command_element);
95 }
96 else if (command_element_name.equals (StaticStrings.BUILDTYPE_ELEMENT)) {
97 return metadataToString (command_element);
98 }
99 else if (command_element_name.equals (StaticStrings.DATABASETYPE_ELEMENT)) {
100 return metadataToString (command_element);
101 }
102 else if(command_element_name.equals (StaticStrings.PLUGIN_ELEMENT)) {
103 return pluginToString (command_element);
104 }
105 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_ELEMENT)) {
106 return subcollectionToString (command_element);
107 }
108 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) {
109 return subcollectionDefaultIndexToString (command_element);
110 }
111 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT)) {
112 return subcollectionIndexesToString (command_element);
113 }
114 else if(command_element_name.equals (StaticStrings.SUPERCOLLECTION_ELEMENT)) {
115 return supercollectionToString (command_element);
116 }
117 else if(command_element_name.equals (StaticStrings.UNKNOWN_ELEMENT)) {
118 return unknownToString (command_element);
119 }
120 return "";
121 }
122
123 static private String classifyToString (Element command_element) {
124 StringBuffer text = new StringBuffer (StaticStrings.CLASSIFY_STR);
125 text.append (StaticStrings.TAB_CHARACTER);
126 text.append (command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE));
127 NodeList option_elements = command_element.getElementsByTagName (StaticStrings.OPTION_ELEMENT);
128 int option_elements_length = option_elements.getLength ();
129 for(int j = 0; j < option_elements_length; j++) {
130 Element option_element = (Element) option_elements.item (j);
131 if(option_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
132 text.append (StaticStrings.SPACE_CHARACTER);
133 text.append (StaticStrings.MINUS_CHARACTER);
134 text.append (option_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
135 String value_str = XMLTools.getValue (option_element);
136
137 if (value_str.length () > 0) {
138 text.append (StaticStrings.SPACE_CHARACTER);
139 if(value_str.indexOf (StaticStrings.SPACE_CHARACTER) != -1) {
140 // enclose in quotes
141 text.append(StaticStrings.SPEECH_CHARACTER);
142 text.append(value_str);
143 text.append(StaticStrings.SPEECH_CHARACTER);
144 } else {
145
146 text.append(value_str);
147 }
148 }
149
150 value_str = null;
151 }
152 option_element = null;
153 }
154 option_elements = null;
155 return text.toString ();
156 }
157
158 static private String formatToString (Element command_element) {
159 StringBuffer text = new StringBuffer (StaticStrings.FORMAT_STR);
160 text.append (StaticStrings.SPACE_CHARACTER);
161 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
162 text.append (StaticStrings.SPACE_CHARACTER);
163 String value_str = command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE);
164 if(value_str.length () != 0) {
165 text.append (value_str);
166 }
167 else {
168 // Remember to encode format string to Greenstone specification
169 value_str = Codec.transform (XMLTools.getValue (command_element), Codec.DOM_TO_GREENSTONE);
170 text.append (StaticStrings.SPEECH_CHARACTER);
171 text.append (value_str);
172 text.append (StaticStrings.SPEECH_CHARACTER);
173 }
174 value_str = null;
175 return text.toString ();
176 }
177
178 static private String indexesToString (Element command_element) {
179 boolean comment_only = false;
180 StringBuffer text = new StringBuffer ("");
181 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
182 text.append ("#");
183 comment_only = true;
184 }
185 text.append (StaticStrings.INDEX_STR);
186 text.append (StaticStrings.TAB_CHARACTER);
187 if(!comment_only) {
188 text.append (StaticStrings.TAB_CHARACTER);
189 }
190 NodeList index_elements = command_element.getElementsByTagName (StaticStrings.INDEX_ELEMENT);
191 if (index_elements.getLength () == 0) { // no indexes
192 return "";
193 }
194 // For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list
195 int index_elements_length = index_elements.getLength ();
196 for(int j = 0; j < index_elements_length; j++) {
197 Element index_element = (Element) index_elements.item (j);
198 String level_str = index_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE);
199 if(level_str.length () > 0) {
200 text.append (level_str);
201 text.append (StaticStrings.COLON_CHARACTER);
202 }
203 NodeList content_elements = index_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
204 int content_elements_length = content_elements.getLength ();
205 // Don't output anything if no indexes are set
206 if(content_elements_length == 0) {
207 return null;
208 }
209 for(int k = 0; k < content_elements_length; k++) {
210 Element content_element = (Element) content_elements.item (k);
211 String name_str = content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
212 text.append (name_str);
213 name_str = null;
214 if(k < content_elements_length - 1) {
215 text.append (StaticStrings.COMMA_CHARACTER);
216 }
217 content_element = null;
218 }
219 if(j < index_elements_length - 1) {
220 text.append (StaticStrings.SPACE_CHARACTER);
221 }
222 content_elements = null;
223 index_element = null;
224 }
225 index_elements = null;
226 return text.toString ();
227 }
228
229 static private String indexDefaultToString (Element command_element) {
230 StringBuffer text = new StringBuffer ("");
231 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
232 text.append ("#");
233 }
234 text.append (StaticStrings.INDEX_DEFAULT_STR);
235 text.append (StaticStrings.TAB_CHARACTER);
236 if (!command_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE).equals ("")) {
237 text.append (command_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE));
238 text.append (StaticStrings.COLON_CHARACTER);
239 }
240 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
241 int content_elements_length = content_elements.getLength ();
242 for(int j = 0; j < content_elements_length; j++) {
243 Element content_element = (Element) content_elements.item (j);
244 String name_str = content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
245 text.append (name_str);
246 name_str = null;
247 if(j < content_elements_length - 1) {
248 text.append (StaticStrings.COMMA_CHARACTER);
249 }
250 content_element = null;
251 }
252 content_elements = null;
253 return text.toString ();
254 }
255
256 static private String languagesToString (Element command_element) {
257 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGES_STR);
258 text.append (StaticStrings.TAB_CHARACTER);
259 // Retrieve all the languages and write them out in a space separated list
260 NodeList language_elements = command_element.getElementsByTagName (StaticStrings.LANGUAGE_ELEMENT);
261 int language_elements_length = language_elements.getLength ();
262 if(language_elements_length == 0) {
263 return null;
264 }
265 for(int j = 0; j < language_elements_length; j++) {
266 Element language_element = (Element) language_elements.item (j);
267 text.append (language_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
268 if(j < language_elements_length - 1) {
269 text.append (StaticStrings.SPACE_CHARACTER);
270 }
271 }
272 return text.toString ();
273 }
274
275 static private String languageDefaultToString (Element command_element) {
276 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGE_DEFAULT_STR);
277 text.append (StaticStrings.TAB_CHARACTER);
278 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
279 return text.toString ();
280 }
281
282 static private String languageMetadataToString (Element command_element) {
283 if (!command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
284 return "";
285 }
286 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGE_METADATA_STR);
287 text.append (StaticStrings.TAB_CHARACTER);
288 String name_str = command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
289 text.append (name_str);
290 return text.toString ();
291 }
292
293 static private String indexOptionsToString (Element command_element) {
294 StringBuffer text = new StringBuffer ("");
295 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
296 text.append ("#");
297 }
298 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
299 text.append (StaticStrings.TAB_CHARACTER);
300 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.INDEXOPTION_ELEMENT);
301 int content_elements_length = content_elements.getLength ();
302 // Don't output anything if no options are set.
303 if(content_elements_length == 0) {
304 return null;
305 }
306 for(int i = 0; i < content_elements_length; i++) {
307 Element content_element = (Element) content_elements.item (i);
308 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
309 text.append (StaticStrings.SPACE_CHARACTER);
310 }
311 return text.substring (0, text.length () - 1);
312 }
313
314 static private String indexOptionDefaultToString (Element command_element) {
315 // Don't bother if there is no value
316 if (command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE).equals ("")) {
317 return "";
318 }
319 StringBuffer text = new StringBuffer ("");
320 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
321 text.append ("#");
322 }
323 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
324 text.append (StaticStrings.TAB_CHARACTER);
325 text.append (command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE));
326 return text.toString ();
327 }
328
329 static private String metadataToString (Element command_element) {
330 // lets first check the value - if its empty, don't bother sticking it in the config file
331 String value_str = XMLTools.getValue (command_element);
332 if (value_str.equals ("")) {
333 return "";
334 }
335 boolean special = false;
336
337 StringBuffer text = new StringBuffer ("");
338 String name_str = command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
339 // If the name is one of the special four, we don't write the collectionmeta first. Note maintainer and buildtype are singled out for 'prittying' reasons.
340 if(name_str.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR)|| name_str.equals (StaticStrings.BUILDTYPE_STR) || name_str.equals (StaticStrings.DATABASETYPE_STR)) {
341 text.append (name_str);
342 text.append (StaticStrings.TAB_CHARACTER);
343 special = true;
344 }
345 else if (name_str.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR) || name_str.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR) ) {
346 text.append (name_str);
347 text.append (StaticStrings.TAB_CHARACTER);
348 text.append (StaticStrings.TAB_CHARACTER);
349 special = true;
350 }
351 else {
352 text.append (StaticStrings.COLLECTIONMETADATA_STR);
353 text.append (StaticStrings.TAB_CHARACTER);
354 text.append (name_str);
355 text.append (StaticStrings.SPACE_CHARACTER);
356 String language_str = command_element.getAttribute (StaticStrings.LANGUAGE_ATTRIBUTE);
357 text.append (StaticStrings.LBRACKET_CHARACTER);
358 text.append (StaticStrings.LANGUAGE_ARGUMENT);
359 text.append (language_str);
360 text.append (StaticStrings.RBRACKET_CHARACTER);
361 text.append (StaticStrings.SPACE_CHARACTER);
362 }
363 name_str = null;
364
365 // decode the value from XML to a form for config file
366 value_str = Codec.transform (value_str, Codec.DOM_TO_GREENSTONE);
367
368 // We don't wrap the email addresses in quotes, nor the other special metadata
369 if(special) {
370 text.append (value_str);
371 }
372 else {
373 text.append (StaticStrings.SPEECH_CHARACTER);
374 text.append (value_str);
375 text.append (StaticStrings.SPEECH_CHARACTER);
376 }
377 value_str = null;
378 return text.toString ();
379 }
380
381 static private String searchtypeToString (Element command_element) {
382 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
383 StringBuffer text = new StringBuffer (StaticStrings.SEARCHTYPE_STR);
384 text.append (StaticStrings.TAB_CHARACTER);
385 NodeList search_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
386 int search_elements_length = search_elements.getLength ();
387 for(int i = 0; i < search_elements_length; i++) {
388 Element search_element = (Element) search_elements.item (i);
389 text.append (search_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
390 text.append (StaticStrings.SPACE_CHARACTER);
391 }
392 return text.substring (0, text.length () - 1);
393 }
394 else {
395 return null;
396 }
397 }
398
399 static private String subcollectionToString (Element command_element) {
400 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_STR);
401 text.append (StaticStrings.SPACE_CHARACTER);
402 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
403 text.append (StaticStrings.SPACE_CHARACTER);
404 text.append (StaticStrings.TAB_CHARACTER);
405 text.append (StaticStrings.SPEECH_CHARACTER);
406 if(command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE).equals (StaticStrings.EXCLUDE_STR)) {
407 text.append (StaticStrings.EXCLAMATION_CHARACTER);
408 }
409 text.append (command_element.getAttribute (StaticStrings.CONTENT_ATTRIBUTE));
410 text.append (StaticStrings.SEPARATOR_CHARACTER);
411 text.append (XMLTools.getValue (command_element));
412 text.append (StaticStrings.SEPARATOR_CHARACTER);
413 String options_str = command_element.getAttribute (StaticStrings.OPTIONS_ATTRIBUTE);
414 if(options_str.length () > 0) {
415 text.append (options_str);
416 }
417 options_str = null;
418 text.append (StaticStrings.SPEECH_CHARACTER);
419 return text.toString ();
420 }
421
422 static private String subcollectionDefaultIndexToString (Element command_element) {
423 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_STR);
424 text.append (StaticStrings.TAB_CHARACTER);
425 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
426 int content_elements_length = content_elements.getLength ();
427 for(int j = 0; j < content_elements_length; j++) {
428 Element content_element = (Element) content_elements.item (j);
429 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
430 if(j < content_elements_length - 1) {
431 text.append (StaticStrings.COMMA_CHARACTER);
432 }
433 }
434 return text.toString ();
435 }
436
437 static private String subcollectionIndexesToString (Element command_element) {
438 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_INDEX_STR);
439 text.append (StaticStrings.TAB_CHARACTER);
440 // Retrieve all of the subcollection index partitions
441 NodeList subcollectionindex_elements = command_element.getElementsByTagName (StaticStrings.INDEX_ELEMENT);
442 int subcollectionindex_elements_length = subcollectionindex_elements.getLength ();
443 if(subcollectionindex_elements_length == 0) {
444 return null;
445 }
446 for(int j = 0; j < subcollectionindex_elements_length; j++) {
447 Element subcollectionindex_element = (Element) subcollectionindex_elements.item (j);
448 NodeList content_elements = subcollectionindex_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
449 int content_elements_length = content_elements.getLength ();
450 for(int k = 0; k < content_elements_length; k++) {
451 Element content_element = (Element) content_elements.item (k);
452 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
453 if(k < content_elements_length - 1) {
454 text.append (StaticStrings.COMMA_CHARACTER);
455 }
456 }
457 if(j < subcollectionindex_elements_length - 1) {
458 text.append (StaticStrings.SPACE_CHARACTER);
459 }
460 }
461 return text.toString ();
462 }
463
464 static private String supercollectionToString (Element command_element) {
465 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.COLLECTION_ELEMENT);
466 int content_elements_length = content_elements.getLength ();
467 if(content_elements_length > 1) {
468 StringBuffer text = new StringBuffer (StaticStrings.SUPERCOLLECTION_STR);
469 text.append (StaticStrings.TAB_CHARACTER);
470 for(int j = 0; j < content_elements_length; j++) {
471 Element content_element = (Element) content_elements.item (j);
472 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
473 if(j < content_elements_length - 1) {
474 text.append (StaticStrings.SPACE_CHARACTER);
475 }
476 }
477 return text.toString ();
478 }
479 return null;
480 }
481
482 static private String unknownToString (Element command_element) {
483 return XMLTools.getValue (command_element);
484 }
485
486
487 /** Parse a collect.cfg into a DOM model representation.
488 * note we are ignoring 2.39 compatibility now. */
489 static public String parse (File collect_cfg_file, Document document) {
490 // hack for pre 2.71 compatibility - we need to add in a
491 // build type if there is not one there
492 boolean search_types_parsed = false;
493 boolean build_types_parsed = false;
494 try {
495 StringBuffer saved_collect_cfg_string_buffer = new StringBuffer ();
496
497 Element collect_cfg_element = document.getDocumentElement ();
498 // Read in the file one command at a time.
499 InputStream istream = new FileInputStream (collect_cfg_file);
500 Reader in_reader = new InputStreamReader (istream, CollectionConfiguration.ENCODING);
501 BufferedReader in = new BufferedReader (in_reader);
502 String command_str = null;
503 while((command_str = in.readLine ()) != null) {
504 saved_collect_cfg_string_buffer.append (command_str + "\n");
505
506 boolean append_element = true;
507 Element command_element = null;
508 // A command may be broken over several lines.
509 command_str = command_str.trim ();
510 boolean eof = false;
511 while(!eof && command_str.endsWith (StaticStrings.NEWLINE_CHARACTER)) {
512 String next_line = in.readLine ();
513 if(next_line != null) {
514 next_line = next_line.trim ();
515 if(next_line.length () > 0) {
516 // Remove the new line character
517 command_str = command_str.substring (0, command_str.lastIndexOf (StaticStrings.NEWLINE_CHARACTER));
518 // And append the next line, which due to the test above must be non-zero length
519 command_str = command_str + next_line;
520 }
521 next_line = null;
522 }
523 // If we've reached the end of the file theres nothing more we can do
524 else {
525 eof = true;
526 }
527 }
528 // If there is still a new line character, then we remove it and hope for the best
529 if(command_str.endsWith (StaticStrings.NEWLINE_CHARACTER)) {
530 command_str = command_str.substring (0, command_str.lastIndexOf (StaticStrings.NEWLINE_CHARACTER));
531 }
532 // Now we've either got a command to parse...
533 if(command_str.length () != 0) {
534 // Start trying to figure out what it is
535 //StringTokenizer tokenizer = new StringTokenizer(command_str);
536 // Instead of a standard string tokenizer I'm going to use the new version of CommandTokenizer, which is not only smart enough to correctly notice speech marks and correctly parse them out, but now also takes the input stream so it can rebuild tokens that stretch over several lines.
537 CommandTokenizer tokenizer = new CommandTokenizer (command_str, in);
538 String command_type = tokenizer.nextToken ().toLowerCase ();
539 // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created
540 if(command_element == null && command_type.equals (StaticStrings.CLASSIFY_STR)) {
541 command_element = parseClassify (command_str, document);
542 }
543 if(command_element == null && command_type.equals (StaticStrings.FORMAT_STR)) {
544 command_element = parseFormat (tokenizer, document); // Revised to handle multiple lines
545 }
546 if(command_element == null && (command_type.equals (StaticStrings.INDEX_STR) || command_type.equals (StaticStrings.COMMENTED_INDEXES_STR))) {
547 command_element = parseIndex (command_str, document);
548 }
549 if(command_element == null && (command_type.equals (StaticStrings.INDEX_DEFAULT_STR) || command_type.equals (StaticStrings.COMMENTED_INDEX_DEFAULT_STR))) {
550
551 command_element = parseIndexDefault (command_str, document);
552 }
553 if(command_element == null && command_type.equals (StaticStrings.LANGUAGES_STR)) {
554 command_element = parseLanguage (command_str, document);
555 }
556 if(command_element == null && command_type.equals (StaticStrings.LANGUAGE_DEFAULT_STR)) {
557 command_element = parseLanguageDefault (command_str, document);
558 }
559 if (command_element == null && command_type.equals (StaticStrings.LANGUAGE_METADATA_STR)) {
560 command_element = parseLanguageMetadata (command_str, document);
561 }
562 if(command_element == null && command_type.equals (StaticStrings.LEVELS_STR)) {
563 command_element = parseIndexOptions (command_str, document, StaticStrings.LEVELS_STR, true);
564 }
565 if (command_element == null && command_type.equals (StaticStrings.COMMENTED_LEVELS_STR)) {
566 command_element = parseIndexOptions (command_str, document, StaticStrings.LEVELS_STR, false);
567 }
568 if(command_element == null && command_type.equals (StaticStrings.LEVEL_DEFAULT_STR)) {
569 command_element = parseIndexOptionDefault (command_str, document, StaticStrings.LEVEL_DEFAULT_STR, true);
570 }
571 if(command_element == null && command_type.equals (StaticStrings.COMMENTED_LEVEL_DEFAULT_STR)) {
572 command_element = parseIndexOptionDefault (command_str, document, StaticStrings.LEVEL_DEFAULT_STR, false);
573 }
574 if (command_element == null && command_type.equals (StaticStrings.INDEXOPTIONS_STR)) {
575 command_element = parseIndexOptions (command_str, document, StaticStrings.INDEXOPTIONS_STR, true);
576 }
577 if (command_element == null && command_type.equals (StaticStrings.COMMENTED_INDEXOPTIONS_STR)) {
578 command_element = parseIndexOptions (command_str, document, StaticStrings.INDEXOPTIONS_STR, false);
579 }
580 if(command_element == null && command_type.equals (StaticStrings.COLLECTIONMETADATA_STR)) {
581 command_element = parseMetadata (tokenizer, document); // Revised to handle multiple lines
582 }
583 if(command_element == null && (command_type.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR) || command_type.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR) || command_type.equals (StaticStrings.BUILDTYPE_STR) || command_type.equals (StaticStrings.DATABASETYPE_STR))) {
584 command_element = parseMetadataSpecial (command_str, document);
585 // pre 2.71 hack
586 if (command_type.equals (StaticStrings.BUILDTYPE_STR)) {
587 build_types_parsed = true;
588 }
589 }
590 if(command_element == null && command_type.equals (StaticStrings.PLUGIN_STR)) {
591 command_element = parsePlugin (command_str, document);
592 }
593 // leave here for backwards compatibility
594 if(command_element == null && command_type.equals (StaticStrings.SEARCHTYPE_STR)) {
595 command_element = parseSearchType (command_str, document);
596 // pre 2.71 hack
597 search_types_parsed = true;
598
599 }
600 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_STR)) {
601 command_element = parseSubCollection (command_str, document);
602 }
603 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_STR)) {
604 command_element = parseSubCollectionDefaultIndex (command_str, document);
605 }
606 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_INDEX_STR)) {
607 command_element = parseSubCollectionIndex (command_str, document);
608 }
609 if(command_element == null && (command_type.equals (StaticStrings.SUPERCOLLECTION_STR) || command_type.equals (StaticStrings.CCS_STR))) {
610 command_element = parseSuperCollection (command_str, document);
611 }
612 // Doesn't match any known type
613 command_type = null;
614 if(command_element == null) {
615 // No-one knows what to do with this command, so we create an Unknown command element
616 command_element = document.createElement (StaticStrings.UNKNOWN_ELEMENT);
617 XMLTools.setValue (command_element, command_str);
618 }
619 }
620 // Or an empty line to remember for later
621 else {
622 command_element = document.createElement (CollectionConfiguration.NEWLINE_ELEMENT);
623 }
624 // Now command element shouldn't be null so we append it to the collection config DOM, but only if we haven't been told not to add it
625 //if(append_element) {
626 collect_cfg_element.appendChild (command_element);
627 //}
628 }
629 if (!build_types_parsed) {
630 String buildtype_type = BuildTypeManager.BUILD_TYPE_MG;
631 if (search_types_parsed) {
632 buildtype_type = BuildTypeManager.BUILD_TYPE_MGPP;
633 }
634 Element command_element = parseMetadataSpecial (StaticStrings.BUILDTYPE_STR+" "+buildtype_type, document);
635 Node target_node = CollectionConfiguration.findInsertionPoint (command_element);
636 if(target_node != null) {
637 collect_cfg_element.insertBefore (command_element, target_node);
638 }
639 else {
640 collect_cfg_element.appendChild (command_element);
641 }
642
643 }
644 return saved_collect_cfg_string_buffer.toString();
645 }
646 catch(Exception exception) {
647 DebugStream.println ("Error in CollectionConfiguration.parse(java.io.File): " + exception);
648 DebugStream.printStackTrace (exception);
649 }
650
651 return null;
652 }
653
654
655 /** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix.
656 * @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string
657 * @return a HashMap containing the arguments parsed
658 */
659 static public HashMap parseArguments (CommandTokenizer tokenizer) {
660 HashMap arguments = new HashMap ();
661 String name = null;
662 String value = null;
663 while(tokenizer.hasMoreTokens () || name != null) {
664 // First we retrieve a name if we need one.
665 if(name == null) {
666 name = tokenizer.nextToken ();
667 }
668 // Now we attempt to retrieve a value
669 if(tokenizer.hasMoreTokens ()) {
670 value = tokenizer.nextToken ();
671 // Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop.
672 // The value is not a name if it contains a space character: it's a quoted value
673 if (value.startsWith(StaticStrings.MINUS_CHARACTER) && value.indexOf(StaticStrings.SPACE_CHARACTER) == -1) {
674 arguments.put (name, null);
675 name = value;
676 }
677 // Otherwise we have a typical name->value pair ready to go
678 else {
679 arguments.put (name, value);
680 name = null;
681 }
682 }
683 // Otherwise its a binary flag
684 else {
685 arguments.put (name, null);
686 name = null;
687 }
688 }
689 return arguments;
690 }
691
692 static private Element parseClassify (String command_str, Document document) {
693 Element command_element = null;
694 try {
695 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
696 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
697 if(tokenizer.countTokens () >= 2) { // Must support "classify Phind" (no args)
698 command_element = document.createElement (StaticStrings.CLASSIFY_ELEMENT);
699 // First token is classify
700 tokenizer.nextToken ();
701 // The next token is the classifier type
702 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, tokenizer.nextToken ());
703 // Now we parse out the remaining arguments into a hashmapping from name to value
704 HashMap arguments = parseArguments (tokenizer);
705 // Assign the arguments as Option elements
706 Iterator names = arguments.keySet ().iterator ();
707 while(names.hasNext ()) {
708 String name = (String) names.next ();
709 String value = (String) arguments.get (name); // Can be null
710 Element option_element = document.createElement (StaticStrings.OPTION_ELEMENT);
711 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name.substring (1));
712 if(value != null) {
713 // Remove any speech marks appended in strings containing whitespace
714 if(value.startsWith (StaticStrings.SPEECH_CHARACTER) && value.endsWith (StaticStrings.SPEECH_CHARACTER)) {
715 value = value.substring (1, value.length () - 1);
716 }
717 XMLTools.setValue (option_element, value);
718 }
719 option_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
720 command_element.appendChild (option_element);
721 option_element = null;
722 name = null;
723 value = null;
724 }
725 names = null;
726 arguments = null;
727 }
728 tokenizer = null;
729 }
730 catch(Exception error) {
731 }
732 return command_element;
733 }
734
735 static private Element parseFormat (CommandTokenizer tokenizer, Document document) {
736 Element command_element = null;
737 try {
738 command_element = document.createElement (StaticStrings.FORMAT_ELEMENT);
739 String name_str = tokenizer.nextToken ();
740 String value_str = tokenizer.nextToken ();
741 if(name_str != null && value_str != null) {
742 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
743 // If the value is true or false we add it as an attribute
744 if(value_str.equalsIgnoreCase (StaticStrings.TRUE_STR) || value_str.equalsIgnoreCase (StaticStrings.FALSE_STR)) {
745 command_element.setAttribute (StaticStrings.VALUE_ATTRIBUTE, value_str.toLowerCase ());
746 }
747 // Otherwise it gets added as a text node
748 else {
749 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
750 value_str = Codec.transform (value_str, Codec.GREENSTONE_TO_DOM);
751 XMLTools.setValue (command_element, value_str);
752 }
753 }
754 else {
755 command_element = null;
756 }
757 name_str = null;
758 value_str = null;
759 }
760 catch (Exception exception) {
761 DebugStream.printStackTrace (exception);
762 command_element = null;
763 }
764 return command_element;
765 }
766
767 static private Element parseIndex (String command_str, Document document) {
768 Element command_element = null;
769 try {
770 StringTokenizer tokenizer = new StringTokenizer (command_str);
771 String command = tokenizer.nextToken ();
772 command_element = document.createElement (StaticStrings.INDEXES_ELEMENT);
773 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (command.equals (StaticStrings.INDEX_STR) ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
774 command = null;
775 if(!tokenizer.hasMoreTokens ()) {
776
777 // there are no indexes
778 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.FALSE_STR);
779 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.FALSE_STR); // for now
780 tokenizer = null;
781 return command_element;
782 }
783
784 while(tokenizer.hasMoreTokens ()) {
785 Element index_element = document.createElement (StaticStrings.INDEX_ELEMENT);
786 String index_str = tokenizer.nextToken ();
787 // There are two types of index we have to consider. MG versions use "level:source,source" while MGPP versions use "source,source source"
788 if(index_str.indexOf (StaticStrings.COLON_CHARACTER) != -1) {
789 index_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE, index_str.substring (0, index_str.indexOf (StaticStrings.COLON_CHARACTER)));
790 index_str = index_str.substring (index_str.indexOf (StaticStrings.COLON_CHARACTER) + 1);
791 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.FALSE_STR);
792 }
793 else {
794 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.TRUE_STR);
795 }
796 StringTokenizer content_tokenizer = new StringTokenizer (index_str, StaticStrings.COMMA_CHARACTER);
797 while(content_tokenizer.hasMoreTokens ()) {
798 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
799 String content_str = content_tokenizer.nextToken ();
800 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
801 if(content_str.indexOf (StaticStrings.NS_SEP) == -1) {
802 if(content_str.equals (StaticStrings.TEXT_STR) || content_str.equals (StaticStrings.ALLFIELDS_STR) || content_str.equals(StaticStrings.METADATA_STR)) {
803 // Our special strings are OK.
804 }
805 else {
806 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
807 }
808 }
809 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_str);
810 index_element.appendChild (content_element);
811 content_element = null;
812 }
813 content_tokenizer = null;
814 index_str = null;
815 command_element.appendChild (index_element);
816 index_element = null;
817 }
818 tokenizer = null;
819 }
820 catch (Exception exception) {
821 exception.printStackTrace ();
822 }
823 return command_element;
824 }
825
826 static private Element parseIndexDefault (String command_str, Document document) {
827 Element command_element = null;
828 try {
829 StringTokenizer tokenizer = new StringTokenizer (command_str);
830 if(tokenizer.countTokens () >= 2) {
831 command_element = document.createElement (StaticStrings.INDEX_DEFAULT_ELEMENT);
832 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken ()).equals (StaticStrings.INDEX_DEFAULT_STR) ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
833 String index_str = tokenizer.nextToken ();
834 String level="";
835 if (index_str.indexOf (StaticStrings.COLON_CHARACTER) !=-1) {
836 level = index_str.substring (0, index_str.indexOf (StaticStrings.COLON_CHARACTER));
837 }
838
839 command_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE,level);
840
841 String content_str = index_str;
842
843 if (index_str.indexOf (StaticStrings.COLON_CHARACTER) !=-1) {
844 content_str = index_str.substring (index_str.indexOf (StaticStrings.COLON_CHARACTER) + 1);
845 }
846
847 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
848 while(content_tokenizer.hasMoreTokens ()) {
849 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
850 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
851 command_element.appendChild (content_element);
852 content_element = null;
853 }
854 content_tokenizer = null;
855 content_str = null;
856 content_str = null;
857 index_str = null;
858 }
859 tokenizer = null;
860 }
861 catch (Exception exception) {
862 }
863 return command_element;
864 }
865
866 static private Element parseLanguage (String command_str, Document document) {
867 Element command_element = null;
868 try {
869 StringTokenizer tokenizer = new StringTokenizer (command_str);
870 tokenizer.nextToken ();
871 if(tokenizer.hasMoreTokens ()) {
872 command_element = document.createElement (StaticStrings.LANGUAGES_ELEMENT);
873 while(tokenizer.hasMoreTokens ()) {
874 Element language_element = document.createElement (StaticStrings.LANGUAGE_ELEMENT);
875 language_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
876 command_element.appendChild (language_element);
877 language_element = null;
878 }
879 }
880 tokenizer = null;
881 }
882 catch (Exception exception) {
883 }
884 return command_element;
885 }
886
887 static private Element parseLanguageDefault (String command_str, Document document) {
888 Element command_element = null;
889 try {
890 StringTokenizer tokenizer = new StringTokenizer (command_str);
891 if(tokenizer.countTokens () >= 2) {
892 command_element = document.createElement (StaticStrings.LANGUAGE_DEFAULT_ELEMENT);
893 tokenizer.nextToken ();
894 String default_language_str = tokenizer.nextToken ();
895 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, default_language_str);
896 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
897 default_language_str = null;
898 }
899 tokenizer = null;
900 }
901 catch (Exception exception) {
902 }
903 return command_element;
904 }
905
906 static private Element parseLanguageMetadata (String command_str, Document document) {
907 Element command_element = null;
908 try {
909 StringTokenizer tokenizer = new StringTokenizer (command_str);
910 if(tokenizer.countTokens () >= 2) {
911 command_element = document.createElement (StaticStrings.LANGUAGE_METADATA_ELEMENT);
912 tokenizer.nextToken ();
913 String language_metadata_str = tokenizer.nextToken ();
914 if (language_metadata_str.indexOf (StaticStrings.NS_SEP) == -1) {
915 language_metadata_str = StaticStrings.EXTRACTED_NAMESPACE + language_metadata_str;
916 }
917 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, language_metadata_str);
918 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
919 language_metadata_str = null;
920 }
921 tokenizer = null;
922
923 }
924 catch (Exception exception) {
925 }
926 return command_element;
927 }
928
929 static private Element parseIndexOptions (String command_str, Document document, String type, boolean assigned) {
930 Element command_element = null;
931 try {
932 StringTokenizer tokenizer = new StringTokenizer (command_str);
933 // First token is command type
934 String command = tokenizer.nextToken ();
935 if(tokenizer.hasMoreTokens ()) {
936 command_element = document.createElement (StaticStrings.INDEXOPTIONS_ELEMENT);
937 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE,type);
938 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (assigned ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
939 while(tokenizer.hasMoreTokens ()) {
940 Element option_element = document.createElement (StaticStrings.INDEXOPTION_ELEMENT);
941 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
942 command_element.appendChild (option_element);
943 option_element = null;
944 }
945 }
946 command = null;
947 }
948 catch(Exception exception) {
949 }
950 return command_element;
951 }
952
953 static private Element parseIndexOptionDefault (String command_str, Document document, String type, boolean assigned) {
954 Element command_element = null;
955 try {
956 StringTokenizer tokenizer = new StringTokenizer (command_str);
957 // First token is command type
958 String command = tokenizer.nextToken ();
959 if(tokenizer.hasMoreTokens ()) {
960 command_element = document.createElement (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT);
961 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (assigned ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR)); // is it commented out or not?
962 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, type);
963 command_element.setAttribute (StaticStrings.VALUE_ATTRIBUTE, tokenizer.nextToken ());
964 }
965
966 tokenizer = null;
967 }
968 catch (Exception exception) {
969 }
970 return command_element;
971 }
972
973 static private Element parseMetadata (CommandTokenizer tokenizer, Document document) {
974 Element command_element = null;
975 try {
976 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_ELEMENT);
977 String name_str = tokenizer.nextToken ();
978 String value_str = tokenizer.nextToken ();
979 if(name_str != null && value_str != null) {
980 String language_str = Configuration.getLanguage ();
981 // Check if the value string is actually a language string
982 if(value_str.startsWith (StaticStrings.LBRACKET_CHARACTER) && value_str.endsWith (StaticStrings.RBRACKET_CHARACTER)) {
983 language_str = value_str.substring (value_str.indexOf (StaticStrings.LANGUAGE_ARGUMENT) + 2, value_str.length () - 1);
984 value_str = tokenizer.nextToken ();
985 }
986 if(value_str != null) {
987 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
988 value_str = Codec.transform (value_str, Codec.GREENSTONE_TO_DOM);
989 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
990 command_element.setAttribute (StaticStrings.LANGUAGE_ATTRIBUTE, language_str);
991 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
992 XMLTools.setValue (command_element, value_str);
993 }
994 else {
995 command_element = null;
996 }
997 language_str = null;
998 }
999 else {
1000 command_element = null;
1001 }
1002 name_str = null;
1003 value_str = null;
1004 }
1005 catch (Exception exception) {
1006 DebugStream.printStackTrace (exception);
1007 command_element = null;
1008 }
1009 return command_element;
1010 }
1011
1012 static private Element parseMetadataSpecial (String command_str, Document document) {
1013 Element command_element = null;
1014 try {
1015 StringTokenizer tokenizer = new StringTokenizer (command_str);
1016 if(tokenizer.countTokens () >= 2) {
1017 String name_str = tokenizer.nextToken ();
1018 String value_str = tokenizer.nextToken ();
1019 if (name_str.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR)) {
1020 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT);
1021 }
1022 else if(name_str.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR)) {
1023 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT);
1024 }
1025 else if(name_str.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR)) {
1026 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT);
1027 }
1028 else if (name_str.equals (StaticStrings.BUILDTYPE_STR)) {
1029 command_element = document.createElement (StaticStrings.BUILDTYPE_ELEMENT);
1030 }
1031 else if (name_str.equals (StaticStrings.DATABASETYPE_STR)) {
1032 command_element = document.createElement (StaticStrings.DATABASETYPE_ELEMENT);
1033 }
1034 if(command_element != null) {
1035 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
1036 command_element.setAttribute (StaticStrings.LANGUAGE_ATTRIBUTE, StaticStrings.ENGLISH_LANGUAGE_STR);
1037 command_element.setAttribute (StaticStrings.SPECIAL_ATTRIBUTE, StaticStrings.TRUE_STR);
1038 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1039 if(value_str.startsWith (StaticStrings.SPEECH_CHARACTER) && value_str.endsWith (StaticStrings.SPEECH_CHARACTER)) {
1040 value_str = value_str.substring (1, value_str.length () - 1);
1041 }
1042 XMLTools.setValue (command_element, value_str);
1043 }
1044 value_str = null;
1045 name_str = null;
1046 }
1047 tokenizer = null;
1048 }
1049 catch (Exception exception) {
1050 }
1051 return command_element;
1052 }
1053
1054 static private Element parsePlugin (String command_str, Document document) {
1055 Element command_element = null;
1056 try {
1057 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
1058 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1059 if(tokenizer.countTokens () >= 2) {
1060 command_element = document.createElement (StaticStrings.PLUGIN_ELEMENT);
1061 // First token is plugin
1062 tokenizer.nextToken ();
1063 // The next token is the type
1064 String type = tokenizer.nextToken ();
1065 type = Utility.ensureNewPluginName(type);
1066 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, type);
1067 // Now we parse out the remaining arguments into a hashmapping from name to value
1068 HashMap arguments = parseArguments (tokenizer);
1069 // also watch out for the deprecated -use_metadata_files option to RecPlug and remove it
1070 Iterator names = arguments.keySet ().iterator ();
1071 while(names.hasNext ()) {
1072 String name = (String) names.next ();
1073 String value = (String) arguments.get (name); // Can be null
1074
1075 if(type.equals (StaticStrings.RECPLUG_STR) && name.substring (1).equals (StaticStrings.USE_METADATA_FILES_ARGUMENT)) {
1076 continue; // ignore this option
1077 }
1078 Element option_element = document.createElement (StaticStrings.OPTION_ELEMENT);
1079 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name.substring (1));
1080 option_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1081 if(value != null) {
1082 // Remove any speech marks appended in strings containing whitespace
1083 if(value.startsWith (StaticStrings.SPEECH_CHARACTER) && value.endsWith (StaticStrings.SPEECH_CHARACTER)) {
1084 value = value.substring (1, value.length () - 1);
1085 }
1086 XMLTools.setValue (option_element, value);
1087 }
1088 command_element.appendChild (option_element);
1089 option_element = null;
1090 name = null;
1091 value = null;
1092 }
1093
1094 type = null;
1095 names = null;
1096 arguments = null;
1097 }
1098 tokenizer = null;
1099 }
1100 catch(Exception exception) {
1101 // This catch clause had been left empty. If this is deliberate then
1102 // we should have a comment here explaining why there is no need to
1103 // print anything out. Am assuming this is mistake for now, and
1104 // have added in a call to printStackTrace()
1105 System.err.println("Malformed plugin statement");
1106 exception.printStackTrace();
1107 }
1108 return command_element;
1109 }
1110
1111 /* search types are now handled as formats - leave this here to convert in case we have an old config file */
1112 static private Element parseSearchType (String command_str, Document document) {
1113 Element command_element = null;
1114 try {
1115 StringTokenizer tokenizer = new StringTokenizer (command_str);
1116 // First token is command type (searchtype)
1117 tokenizer.nextToken ();
1118 if(tokenizer.hasMoreTokens ()) {
1119 command_element = document.createElement (StaticStrings.FORMAT_ELEMENT);
1120 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, "SearchTypes");
1121 String value = tokenizer.nextToken ();
1122 while(tokenizer.hasMoreTokens ()) {
1123 value += ","+tokenizer.nextToken ();
1124 }
1125 value = Codec.transform (value, Codec.GREENSTONE_TO_DOM);
1126 XMLTools.setValue (command_element, value);
1127 }
1128 }
1129 catch(Exception exception) {
1130 }
1131 return command_element;
1132 }
1133
1134 static private Element parseSubCollection (String command_str, Document document) {
1135 Element command_element = null;
1136 try {
1137 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
1138 if(tokenizer.countTokens () >= 3) {
1139 command_element = document.createElement (StaticStrings.SUBCOLLECTION_ELEMENT);
1140 // First token is command type
1141 tokenizer.nextToken ();
1142 // Then subcollection identifier
1143 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
1144 // Then finally the pattern used to build the subcollection partition
1145 String full_pattern_str = tokenizer.nextToken ();
1146 // Set inclusion/exclusion flag and remove any exclamation mark
1147 boolean exclusion = full_pattern_str.startsWith (StaticStrings.EXCLAMATION_CHARACTER);
1148 if (exclusion) {
1149 full_pattern_str = full_pattern_str.substring (1, full_pattern_str.length ());
1150 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, StaticStrings.EXCLUDE_STR);
1151 }
1152 else {
1153 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, StaticStrings.INCLUDE_STR);
1154 }
1155
1156 // Let's make sure it is a valid Greenstone configuration line
1157 String[] results = full_pattern_str.split("\\" + StaticStrings.SEPARATOR_CHARACTER, 3);
1158
1159 if (results.length >= 2) {
1160 String content_str = results[0];
1161 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1162 if (!content_str.equals (StaticStrings.FILENAME_STR) && content_str.indexOf (StaticStrings.NS_SEP) == -1) {
1163 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
1164 }
1165 command_element.setAttribute (StaticStrings.CONTENT_ATTRIBUTE, content_str);
1166 XMLTools.setValue (command_element, results[1]);
1167 if (results.length >= 3) {
1168 command_element.setAttribute (StaticStrings.OPTIONS_ATTRIBUTE, results[2]);
1169 }
1170 }
1171 }
1172 }
1173 catch(Exception exception) {
1174 exception.printStackTrace ();
1175 }
1176 return command_element;
1177 }
1178
1179 static private Element parseSubCollectionDefaultIndex (String command_str, Document document) {
1180 Element command_element = null;
1181 try {
1182 StringTokenizer tokenizer = new StringTokenizer (command_str);
1183 if(tokenizer.countTokens () == 2) {
1184 command_element = document.createElement (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT);
1185 tokenizer.nextToken ();
1186 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1187 String content_str = tokenizer.nextToken ();
1188 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
1189 while(content_tokenizer.hasMoreTokens ()) {
1190 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
1191 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
1192 command_element.appendChild (content_element);
1193 content_element = null;
1194 }
1195 content_tokenizer = null;
1196 content_str = null;
1197 }
1198 tokenizer = null;
1199 }
1200 catch(Exception exception) {
1201 }
1202 return command_element;
1203 }
1204
1205 static private Element parseSubCollectionIndex (String command_str, Document document) {
1206 Element command_element = null;
1207 try {
1208 StringTokenizer tokenizer = new StringTokenizer (command_str);
1209 tokenizer.nextToken ();
1210 if(tokenizer.hasMoreTokens ()) {
1211 command_element = document.createElement (StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT);
1212 }
1213 while(tokenizer.hasMoreTokens ()) {
1214 Element subcollectionindex_element = document.createElement (StaticStrings.INDEX_ELEMENT);
1215 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1216 String content_str = tokenizer.nextToken ();
1217 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
1218 while(content_tokenizer.hasMoreTokens ()) {
1219 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
1220 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
1221 subcollectionindex_element.appendChild (content_element);
1222 content_element = null;
1223 }
1224 content_tokenizer = null;
1225 content_str = null;
1226 command_element.appendChild (subcollectionindex_element);
1227 subcollectionindex_element = null;
1228 }
1229 tokenizer = null;
1230 }
1231 catch (Exception exception) {
1232 }
1233 return command_element;
1234 }
1235
1236 static private Element parseSuperCollection (String command_str, Document document) {
1237 Element command_element = null;
1238 try {
1239 StringTokenizer tokenizer = new StringTokenizer (command_str);
1240 if(tokenizer.countTokens () >= 3) {
1241 command_element = document.createElement (StaticStrings.SUPERCOLLECTION_ELEMENT);
1242 tokenizer.nextToken ();
1243 while(tokenizer.hasMoreTokens ()) {
1244 Element collection_element = document.createElement (StaticStrings.COLLECTION_ELEMENT);
1245 collection_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
1246 command_element.appendChild (collection_element);
1247 collection_element = null;
1248 }
1249 }
1250 tokenizer = null;
1251 }
1252 catch(Exception exception) {
1253 }
1254 return command_element;
1255 }
1256
1257 static private String pluginToString (Element command_element) {
1258 if(command_element.getAttribute (StaticStrings.SEPARATOR_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
1259 return "";
1260 }
1261 StringBuffer text = new StringBuffer (StaticStrings.PLUGIN_STR);
1262 text.append (StaticStrings.TAB_CHARACTER);
1263 text.append (command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE));
1264 // Retrieve, and output, the arguments
1265 NodeList option_elements = command_element.getElementsByTagName (StaticStrings.OPTION_ELEMENT);
1266 int option_elements_length = option_elements.getLength ();
1267 if(option_elements_length > 0) {
1268 for(int j = 0; j < option_elements_length; j++) {
1269 Element option_element = (Element) option_elements.item (j);
1270 if(option_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
1271 text.append (StaticStrings.SPACE_CHARACTER);
1272 text.append (StaticStrings.MINUS_CHARACTER);
1273 text.append (option_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
1274 String value_str = XMLTools.getValue (option_element);
1275 if (value_str.length () > 0) {
1276 text.append (StaticStrings.SPACE_CHARACTER);
1277 if(value_str.indexOf (StaticStrings.SPACE_CHARACTER) != -1) {
1278 // enclose in quotes
1279 text.append(StaticStrings.SPEECH_CHARACTER);
1280 text.append(value_str);
1281 text.append(StaticStrings.SPEECH_CHARACTER);
1282 } else {
1283
1284 text.append(value_str);
1285 }
1286 }
1287
1288 value_str = null;
1289 }
1290 option_element = null;
1291 }
1292 }
1293 option_elements = null;
1294
1295 return text.toString ();
1296 }
1297
1298 static public String generateStringVersion(Document document) {
1299
1300 StringBuffer collect_cfg_string_buffer = new StringBuffer ();
1301 NodeList command_elements = document.getDocumentElement ().getChildNodes ();
1302 boolean just_wrote_blank_line = false; // Prevent two or more blank lines in a row
1303 for (int i = 0; i < command_elements.getLength (); i++) {
1304 Node command_node = command_elements.item (i);
1305 if (!(command_node instanceof Element)) {
1306 // We're only interested in Elements
1307 continue;
1308 }
1309 Element command_element = (Element) command_node;
1310
1311 // Handle NewLine elements (blank lines)
1312 if (command_element.getNodeName ().equals (CollectionConfiguration.NEWLINE_ELEMENT) && !just_wrote_blank_line) {
1313 collect_cfg_string_buffer.append ("\n");
1314 just_wrote_blank_line = true;
1315 }
1316
1317 // Anything else we write to file, but only if it has been assigned, except for index and level commands
1318 // (which just get commented out if unassigned -- a side effect of MG & MGPP compatibility)
1319 else if (!command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR) || command_element.getNodeName ().equals (StaticStrings.INDEXES_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEX_DEFAULT_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEXOPTIONS_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT)) {
1320 String command = toString(command_element);
1321
1322 if (command != null && command.length ()> 0 ) {
1323 collect_cfg_string_buffer.append (command + "\n");
1324 just_wrote_blank_line = false;
1325 }
1326 }
1327 }
1328
1329 return collect_cfg_string_buffer.toString ();
1330 }
1331
1332
1333}
Note: See TracBrowser for help on using the repository browser.