source: main/trunk/gli/src/org/greenstone/gatherer/cdm/CollectCfgReadWrite.java@ 36244

Last change on this file since 36244 was 36244, checked in by kjdon, 23 months ago

updating this for new searchmeta changes

File size: 65.1 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * Methods to read collect.cfg files into internal XML form, and write
9 * them back out again.
10 *
11 * Copyright (C) 1999 New Zealand Digital Library Project
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 *########################################################################
27 */
28package org.greenstone.gatherer.cdm;
29
30import java.io.BufferedReader;
31import java.io.File;
32import java.io.FileInputStream;
33import java.io.InputStream;
34import java.io.InputStreamReader;
35import java.io.Reader;
36
37import java.util.HashMap;
38import java.util.Iterator;
39import java.util.StringTokenizer;
40
41import org.greenstone.gatherer.DebugStream;
42import org.greenstone.gatherer.Configuration;
43import org.greenstone.gatherer.metadata.MetadataElement;
44import org.greenstone.gatherer.metadata.MetadataTools;
45import org.greenstone.gatherer.util.Codec;
46import org.greenstone.gatherer.util.XMLTools;
47import org.greenstone.gatherer.util.StaticStrings;
48import org.greenstone.gatherer.util.Utility;
49
50import org.w3c.dom.*;
51
52public class CollectCfgReadWrite {
53
54
55 static public String toString (Element command_element) {
56 String command_element_name = command_element.getNodeName ();
57 if(command_element_name.equals (StaticStrings.CLASSIFY_ELEMENT)) {
58 return classifyToString (command_element);
59 }
60 else if(command_element_name.equals (StaticStrings.FORMAT_ELEMENT)) {
61 return formatToString (command_element);
62 }
63 else if(command_element_name.equals (StaticStrings.INDEXES_ELEMENT)) {
64 return indexesToString (command_element);
65 }
66 else if(command_element_name.equals (StaticStrings.INDEX_DEFAULT_ELEMENT)) {
67 return indexDefaultToString (command_element);
68 }
69 else if(command_element_name.equals (StaticStrings.SORTS_ELEMENT)) {
70 return sortsToString (command_element);
71 }
72 else if(command_element_name.equals (StaticStrings.SORT_DEFAULT_ELEMENT)) {
73 return sortDefaultToString (command_element);
74 }
75 else if(command_element_name.equals (StaticStrings.LANGUAGES_ELEMENT)) {
76 return languagesToString (command_element);
77 }
78 else if(command_element_name.equals (StaticStrings.LANGUAGE_DEFAULT_ELEMENT)) {
79 return languageDefaultToString (command_element);
80 }
81 else if (command_element_name.equals (StaticStrings.LANGUAGE_METADATA_ELEMENT)) {
82 return languageMetadataToString (command_element);
83 }
84 else if(command_element_name.equals (StaticStrings.INDEXOPTIONS_ELEMENT)) {
85 return indexOptionsToString (command_element);
86 }
87 else if(command_element_name.equals (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT)) {
88 return indexOptionDefaultToString (command_element);
89 }
90 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_ELEMENT)) {
91 return metadataToString (command_element);
92 }
93 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT)) {
94 return metadataToString (command_element);
95 }
96 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT)) {
97 return metadataToString (command_element);
98 }
99 else if(command_element_name.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT)) {
100 return metadataToString (command_element);
101 }
102 else if (command_element_name.equals (StaticStrings.SEARCHMETADATA_ELEMENT)) {
103 return metadataToString( command_element, true);
104 }
105 else if (command_element_name.equals (StaticStrings.BUILDTYPE_ELEMENT)) {
106 return metadataToString (command_element);
107 }
108 else if (command_element_name.equals (StaticStrings.DATABASETYPE_ELEMENT)) {
109 return metadataToString (command_element);
110 }
111 else if(command_element_name.equals (StaticStrings.PLUGIN_ELEMENT)) {
112 return pluginToString (command_element);
113 }
114 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_ELEMENT)) {
115 return subcollectionToString (command_element);
116 }
117 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) {
118 return subcollectionDefaultIndexToString (command_element);
119 }
120 else if(command_element_name.equals (StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT)) {
121 return subcollectionIndexesToString (command_element);
122 }
123 else if(command_element_name.equals (StaticStrings.SUPERCOLLECTION_ELEMENT)) {
124 return supercollectionToString (command_element);
125 }
126 else if(command_element_name.equals (StaticStrings.UNKNOWN_ELEMENT)) {
127 return unknownToString (command_element);
128 }
129 return "";
130 }
131
132 static private String classifyToString (Element command_element) {
133 StringBuffer text = new StringBuffer (StaticStrings.CLASSIFY_STR);
134 text.append (StaticStrings.TAB_CHARACTER);
135 text.append (command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE));
136 NodeList option_elements = command_element.getElementsByTagName (StaticStrings.OPTION_ELEMENT);
137 int option_elements_length = option_elements.getLength ();
138 for(int j = 0; j < option_elements_length; j++) {
139 Element option_element = (Element) option_elements.item (j);
140 if(option_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
141 text.append (StaticStrings.SPACE_CHARACTER);
142 text.append (StaticStrings.MINUS_CHARACTER);
143 text.append (option_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
144 String value_str = XMLTools.getValue (option_element);
145
146 if (value_str.length () > 0) {
147 text.append (StaticStrings.SPACE_CHARACTER);
148 if(value_str.indexOf (StaticStrings.SPACE_CHARACTER) != -1) {
149 // enclose in quotes
150 text.append(StaticStrings.SPEECH_CHARACTER);
151 text.append(value_str);
152 text.append(StaticStrings.SPEECH_CHARACTER);
153 } else {
154
155 text.append(value_str);
156 }
157 }
158
159 value_str = null;
160 }
161 option_element = null;
162 }
163 option_elements = null;
164 return text.toString ();
165 }
166
167 static private String formatToString (Element command_element) {
168 StringBuffer text = new StringBuffer (StaticStrings.FORMAT_STR);
169 text.append (StaticStrings.SPACE_CHARACTER);
170 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
171 text.append (StaticStrings.SPACE_CHARACTER);
172 String value_str = command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE);
173 if(value_str.length () != 0) {
174 text.append (value_str);
175 }
176 else {
177 // Remember to encode format string to Greenstone specification
178 value_str = Codec.transform (XMLTools.getValue (command_element), Codec.DOM_TO_GREENSTONE);
179 text.append (StaticStrings.SPEECH_CHARACTER);
180 text.append (value_str);
181 text.append (StaticStrings.SPEECH_CHARACTER);
182 }
183 value_str = null;
184 return text.toString ();
185 }
186
187 static private String indexesToString (Element command_element) {
188 boolean comment_only = false;
189 StringBuffer text = new StringBuffer ("");
190 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
191 text.append ("#");
192 comment_only = true;
193 }
194 text.append (StaticStrings.INDEX_STR);
195 text.append (StaticStrings.TAB_CHARACTER);
196 if(!comment_only) {
197 text.append (StaticStrings.TAB_CHARACTER);
198 }
199 NodeList index_elements = command_element.getElementsByTagName (StaticStrings.INDEX_ELEMENT);
200 if (index_elements.getLength () == 0) { // no indexes
201 return "";
202 }
203 // For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list
204 int index_elements_length = index_elements.getLength ();
205 for(int j = 0; j < index_elements_length; j++) {
206 Element index_element = (Element) index_elements.item (j);
207 String level_str = index_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE);
208 if(level_str.length () > 0) {
209 text.append (level_str);
210 text.append (StaticStrings.COLON_CHARACTER);
211 }
212 NodeList content_elements = index_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
213 int content_elements_length = content_elements.getLength ();
214 // Don't output anything if no indexes are set
215 if(content_elements_length == 0) {
216 return null;
217 }
218 for(int k = 0; k < content_elements_length; k++) {
219 Element content_element = (Element) content_elements.item (k);
220 String name_str = content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
221 text.append (name_str);
222 name_str = null;
223 if(k < content_elements_length - 1) {
224 text.append (StaticStrings.COMMA_CHARACTER);
225 }
226 content_element = null;
227 }
228 if(j < index_elements_length - 1) {
229 text.append (StaticStrings.SPACE_CHARACTER);
230 }
231 content_elements = null;
232 index_element = null;
233 }
234 index_elements = null;
235 return text.toString ();
236 }
237
238 static private String indexDefaultToString (Element command_element) {
239 StringBuffer text = new StringBuffer ("");
240 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
241 text.append ("#");
242 }
243 text.append (StaticStrings.INDEX_DEFAULT_STR);
244 text.append (StaticStrings.TAB_CHARACTER);
245 if (!command_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE).equals ("")) {
246 text.append (command_element.getAttribute (StaticStrings.LEVEL_ATTRIBUTE));
247 text.append (StaticStrings.COLON_CHARACTER);
248 }
249 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
250 int content_elements_length = content_elements.getLength ();
251 for(int j = 0; j < content_elements_length; j++) {
252 Element content_element = (Element) content_elements.item (j);
253 String name_str = content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
254 text.append (name_str);
255 name_str = null;
256 if(j < content_elements_length - 1) {
257 text.append (StaticStrings.COMMA_CHARACTER);
258 }
259 content_element = null;
260 }
261 content_elements = null;
262 return text.toString ();
263 }
264 static private String sortsToString (Element command_element) {
265 boolean comment_only = false;
266 StringBuffer text = new StringBuffer ("");
267 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
268 System.err.println("sorts not assigned, returning false");
269 return "";
270 // text.append ("#");
271 //comment_only = true;
272 }
273 text.append (StaticStrings.SORT_STR);
274 text.append (StaticStrings.TAB_CHARACTER);
275 // if(!comment_only) { why???
276 // text.append (StaticStrings.TAB_CHARACTER);
277 // }
278 NodeList index_elements = command_element.getElementsByTagName (StaticStrings.SORT_ELEMENT);
279 if (index_elements.getLength () == 0) { // no indexes
280 return "";
281 }
282 // For each sortfield, concatenate its child content elements into a single comma separated list
283 int index_elements_length = index_elements.getLength ();
284 for(int j = 0; j < index_elements_length; j++) {
285 Element index_element = (Element) index_elements.item (j);
286 NodeList content_elements = index_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
287 int content_elements_length = content_elements.getLength ();
288 // Don't output anything if no indexes are set
289 if(content_elements_length == 0) {
290 return null;
291 }
292 for(int k = 0; k < content_elements_length; k++) {
293 Element content_element = (Element) content_elements.item (k);
294 String name_str = content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
295 text.append (name_str);
296 name_str = null;
297 if(k < content_elements_length - 1) {
298 text.append (StaticStrings.COMMA_CHARACTER);
299 }
300 content_element = null;
301 }
302 if(j < index_elements_length - 1) {
303 text.append (StaticStrings.SPACE_CHARACTER);
304 }
305 content_elements = null;
306 index_element = null;
307 }
308 index_elements = null;
309 return text.toString ();
310 }
311
312 static private String sortDefaultToString (Element command_element) {
313 StringBuffer text = new StringBuffer ("");
314 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
315 return "";
316 }
317 text.append (StaticStrings.SORT_DEFAULT_STR);
318 text.append (StaticStrings.TAB_CHARACTER);
319 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
320 int content_elements_length = content_elements.getLength ();
321 for(int j = 0; j < content_elements_length; j++) {
322 Element content_element = (Element) content_elements.item (j);
323 String name_str = content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
324 text.append (name_str);
325 name_str = null;
326 if(j < content_elements_length - 1) {
327 text.append (StaticStrings.COMMA_CHARACTER);
328 }
329 content_element = null;
330 }
331 content_elements = null;
332 return text.toString ();
333 }
334
335 static private String languagesToString (Element command_element) {
336 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGES_STR);
337 text.append (StaticStrings.TAB_CHARACTER);
338 // Retrieve all the languages and write them out in a space separated list
339 NodeList language_elements = command_element.getElementsByTagName (StaticStrings.LANGUAGE_ELEMENT);
340 int language_elements_length = language_elements.getLength ();
341 if(language_elements_length == 0) {
342 return null;
343 }
344 for(int j = 0; j < language_elements_length; j++) {
345 Element language_element = (Element) language_elements.item (j);
346 text.append (language_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
347 if(j < language_elements_length - 1) {
348 text.append (StaticStrings.SPACE_CHARACTER);
349 }
350 }
351 return text.toString ();
352 }
353
354 static private String languageDefaultToString (Element command_element) {
355 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGE_DEFAULT_STR);
356 text.append (StaticStrings.TAB_CHARACTER);
357 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
358 return text.toString ();
359 }
360
361 static private String languageMetadataToString (Element command_element) {
362 if (!command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
363 return "";
364 }
365 StringBuffer text = new StringBuffer (StaticStrings.LANGUAGE_METADATA_STR);
366 text.append (StaticStrings.TAB_CHARACTER);
367 String name_str = command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
368 text.append (name_str);
369 return text.toString ();
370 }
371
372 static private String indexOptionsToString (Element command_element) {
373 StringBuffer text = new StringBuffer ("");
374 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
375 text.append ("#");
376 }
377 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
378 text.append (StaticStrings.TAB_CHARACTER);
379 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.INDEXOPTION_ELEMENT);
380 int content_elements_length = content_elements.getLength ();
381 // Don't output anything if no options are set.
382 if(content_elements_length == 0) {
383 return null;
384 }
385 for(int i = 0; i < content_elements_length; i++) {
386 Element content_element = (Element) content_elements.item (i);
387 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
388 text.append (StaticStrings.SPACE_CHARACTER);
389 }
390 return text.substring (0, text.length () - 1);
391 }
392
393 static private String indexOptionDefaultToString (Element command_element) {
394 // Don't bother if there is no value
395 if (command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE).equals ("")) {
396 return "";
397 }
398 StringBuffer text = new StringBuffer ("");
399 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR)) {
400 text.append ("#");
401 }
402 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
403 text.append (StaticStrings.TAB_CHARACTER);
404 text.append (command_element.getAttribute (StaticStrings.VALUE_ATTRIBUTE));
405 return text.toString ();
406 }
407
408 static private String metadataToString(Element command_element) {
409 return metadataToString(command_element, false);
410 }
411 static private String metadataToString (Element command_element, boolean use_dot) {
412 // lets first check the value - if its empty, don't bother sticking it in the config file
413 String value_str = XMLTools.getValue (command_element);
414 if (value_str.equals ("")) {
415 return "";
416 }
417 boolean special = false;
418
419 StringBuffer text = new StringBuffer ("");
420 String name_str = command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE);
421 // If the name is one of the special four, we don't write the collectionmeta first. Note maintainer and buildtype are singled out for 'prittying' reasons.
422 if(name_str.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR)|| name_str.equals (StaticStrings.BUILDTYPE_STR) || name_str.equals (StaticStrings.DATABASETYPE_STR)) {
423 text.append (name_str);
424 text.append (StaticStrings.TAB_CHARACTER);
425 special = true;
426 }
427 else if (name_str.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR) || name_str.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR) ) {
428 text.append (name_str);
429 text.append (StaticStrings.TAB_CHARACTER);
430 text.append (StaticStrings.TAB_CHARACTER);
431 special = true;
432 }
433 else {
434 text.append (StaticStrings.COLLECTIONMETADATA_STR);
435 text.append (StaticStrings.TAB_CHARACTER);
436 if (use_dot) {
437 text.append(StaticStrings.STOP_CHARACTER);
438 }
439 text.append (name_str);
440 text.append (StaticStrings.SPACE_CHARACTER);
441 String language_str = command_element.getAttribute (StaticStrings.LANGUAGE_ATTRIBUTE);
442 text.append (StaticStrings.LBRACKET_CHARACTER);
443 text.append (StaticStrings.LANGUAGE_ARGUMENT);
444 text.append (language_str);
445 text.append (StaticStrings.RBRACKET_CHARACTER);
446 text.append (StaticStrings.SPACE_CHARACTER);
447 }
448 name_str = null;
449
450 // decode the value from XML to a form for config file
451 value_str = Codec.transform (value_str, Codec.DOM_TO_GREENSTONE);
452
453 // We don't wrap the email addresses in quotes, nor the other special metadata
454 if(special) {
455 text.append (value_str);
456 }
457 else {
458 text.append (StaticStrings.SPEECH_CHARACTER);
459 text.append (value_str);
460 text.append (StaticStrings.SPEECH_CHARACTER);
461 }
462 value_str = null;
463 return text.toString ();
464 }
465
466 static private String searchtypeToString (Element command_element) {
467 if(command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
468 StringBuffer text = new StringBuffer (StaticStrings.SEARCHTYPE_STR);
469 text.append (StaticStrings.TAB_CHARACTER);
470 NodeList search_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
471 int search_elements_length = search_elements.getLength ();
472 for(int i = 0; i < search_elements_length; i++) {
473 Element search_element = (Element) search_elements.item (i);
474 text.append (search_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
475 text.append (StaticStrings.SPACE_CHARACTER);
476 }
477 return text.substring (0, text.length () - 1);
478 }
479 else {
480 return null;
481 }
482 }
483
484 static private String subcollectionToString (Element command_element) {
485 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_STR);
486 text.append (StaticStrings.SPACE_CHARACTER);
487 text.append (command_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
488 text.append (StaticStrings.SPACE_CHARACTER);
489 text.append (StaticStrings.TAB_CHARACTER);
490 text.append (StaticStrings.SPEECH_CHARACTER);
491 if(command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE).equals (StaticStrings.EXCLUDE_STR)) {
492 text.append (StaticStrings.EXCLAMATION_CHARACTER);
493 }
494 text.append (command_element.getAttribute (StaticStrings.CONTENT_ATTRIBUTE));
495 text.append (StaticStrings.SEPARATOR_CHARACTER);
496 text.append (XMLTools.getValue (command_element));
497 text.append (StaticStrings.SEPARATOR_CHARACTER);
498 String options_str = command_element.getAttribute (StaticStrings.OPTIONS_ATTRIBUTE);
499 if(options_str.length () > 0) {
500 text.append (options_str);
501 }
502 options_str = null;
503 text.append (StaticStrings.SPEECH_CHARACTER);
504 return text.toString ();
505 }
506
507 static private String subcollectionDefaultIndexToString (Element command_element) {
508 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_STR);
509 text.append (StaticStrings.TAB_CHARACTER);
510 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
511 int content_elements_length = content_elements.getLength ();
512 for(int j = 0; j < content_elements_length; j++) {
513 Element content_element = (Element) content_elements.item (j);
514 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
515 if(j < content_elements_length - 1) {
516 text.append (StaticStrings.COMMA_CHARACTER);
517 }
518 }
519 return text.toString ();
520 }
521
522 static private String subcollectionIndexesToString (Element command_element) {
523 StringBuffer text = new StringBuffer (StaticStrings.SUBCOLLECTION_INDEX_STR);
524 text.append (StaticStrings.TAB_CHARACTER);
525 // Retrieve all of the subcollection index partitions
526 NodeList subcollectionindex_elements = command_element.getElementsByTagName (StaticStrings.INDEX_ELEMENT);
527 int subcollectionindex_elements_length = subcollectionindex_elements.getLength ();
528 if(subcollectionindex_elements_length == 0) {
529 return null;
530 }
531 for(int j = 0; j < subcollectionindex_elements_length; j++) {
532 Element subcollectionindex_element = (Element) subcollectionindex_elements.item (j);
533 NodeList content_elements = subcollectionindex_element.getElementsByTagName (StaticStrings.CONTENT_ELEMENT);
534 int content_elements_length = content_elements.getLength ();
535 for(int k = 0; k < content_elements_length; k++) {
536 Element content_element = (Element) content_elements.item (k);
537 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
538 if(k < content_elements_length - 1) {
539 text.append (StaticStrings.COMMA_CHARACTER);
540 }
541 }
542 if(j < subcollectionindex_elements_length - 1) {
543 text.append (StaticStrings.SPACE_CHARACTER);
544 }
545 }
546 return text.toString ();
547 }
548
549 static private String supercollectionToString (Element command_element) {
550 NodeList content_elements = command_element.getElementsByTagName (StaticStrings.COLLECTION_ELEMENT);
551 int content_elements_length = content_elements.getLength ();
552 if(content_elements_length > 1) {
553 StringBuffer text = new StringBuffer (StaticStrings.SUPERCOLLECTION_STR);
554 text.append (StaticStrings.TAB_CHARACTER);
555 for(int j = 0; j < content_elements_length; j++) {
556 Element content_element = (Element) content_elements.item (j);
557 text.append (content_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
558 if(j < content_elements_length - 1) {
559 text.append (StaticStrings.SPACE_CHARACTER);
560 }
561 }
562 return text.toString ();
563 }
564 return null;
565 }
566
567 static private String unknownToString (Element command_element) {
568 return XMLTools.getValue (command_element);
569 }
570
571
572 /** Parse a collect.cfg into a DOM model representation.
573 * note we are ignoring 2.39 compatibility now. */
574 static public String parse (File collect_cfg_file, Document document) {
575 // hack for pre 2.71 compatibility - we need to add in a
576 // build type if there is not one there
577 boolean search_types_parsed = false;
578 boolean build_types_parsed = false;
579 try {
580 StringBuffer saved_collect_cfg_string_buffer = new StringBuffer ();
581
582 Element collect_cfg_element = document.getDocumentElement ();
583 // Read in the file one command at a time.
584 InputStream istream = new FileInputStream (collect_cfg_file);
585 Reader in_reader = new InputStreamReader (istream, CollectionConfiguration.ENCODING);
586 BufferedReader in = new BufferedReader (in_reader);
587 String command_str = null;
588 while((command_str = in.readLine ()) != null) {
589 saved_collect_cfg_string_buffer.append (command_str + "\n");
590
591 boolean append_element = true;
592 Element command_element = null;
593 // A command may be broken over several lines.
594 command_str = command_str.trim ();
595 boolean eof = false;
596 while(!eof && command_str.endsWith (StaticStrings.NEWLINE_CHARACTER)) {
597 String next_line = in.readLine ();
598 if(next_line != null) {
599 next_line = next_line.trim ();
600 if(next_line.length () > 0) {
601 // Remove the new line character
602 command_str = command_str.substring (0, command_str.lastIndexOf (StaticStrings.NEWLINE_CHARACTER));
603 // And append the next line, which due to the test above must be non-zero length
604 command_str = command_str + next_line;
605 }
606 next_line = null;
607 }
608 // If we've reached the end of the file theres nothing more we can do
609 else {
610 eof = true;
611 }
612 }
613 // If there is still a new line character, then we remove it and hope for the best
614 if(command_str.endsWith (StaticStrings.NEWLINE_CHARACTER)) {
615 command_str = command_str.substring (0, command_str.lastIndexOf (StaticStrings.NEWLINE_CHARACTER));
616 }
617 // Now we've either got a command to parse...
618 if(command_str.length () != 0) {
619 // Start trying to figure out what it is
620 //StringTokenizer tokenizer = new StringTokenizer(command_str);
621 // Instead of a standard string tokenizer I'm going to use the new version of CommandTokenizer, which is not only smart enough to correctly notice speech marks and correctly parse them out, but now also takes the input stream so it can rebuild tokens that stretch over several lines.
622 CommandTokenizer tokenizer = new CommandTokenizer (command_str, in);
623 String command_type = tokenizer.nextToken ().toLowerCase ();
624 // Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created
625 if(command_element == null && command_type.equals (StaticStrings.CLASSIFY_STR)) {
626 command_element = parseClassify (command_str, document);
627 }
628 if(command_element == null && command_type.equals (StaticStrings.FORMAT_STR)) {
629 command_element = parseFormat (tokenizer, document); // Revised to handle multiple lines
630 }
631 if(command_element == null && (command_type.equals (StaticStrings.INDEX_STR) || command_type.equals (StaticStrings.COMMENTED_INDEXES_STR))) {
632 command_element = parseIndex (command_str, document);
633 }
634 if(command_element == null && (command_type.equals (StaticStrings.INDEX_DEFAULT_STR) || command_type.equals (StaticStrings.COMMENTED_INDEX_DEFAULT_STR))) {
635
636 command_element = parseIndexDefault (command_str, document);
637 }
638 if(command_element == null && command_type.equals (StaticStrings.SORT_STR)) {
639 command_element = parseSortfields (command_str, document);
640 }
641 if(command_element == null && command_type.equals (StaticStrings.SORT_DEFAULT_STR) ) {
642
643 command_element = parseSortfieldDefault (command_str, document);
644 }
645 if(command_element == null && command_type.equals (StaticStrings.LANGUAGES_STR)) {
646 command_element = parseLanguage (command_str, document);
647 }
648 if(command_element == null && command_type.equals (StaticStrings.LANGUAGE_DEFAULT_STR)) {
649 command_element = parseLanguageDefault (command_str, document);
650 }
651 if (command_element == null && command_type.equals (StaticStrings.LANGUAGE_METADATA_STR)) {
652 command_element = parseLanguageMetadata (command_str, document);
653 }
654 if(command_element == null && command_type.equals (StaticStrings.LEVELS_STR)) {
655 command_element = parseIndexOptions (command_str, document, StaticStrings.LEVELS_STR, true);
656 }
657 if (command_element == null && command_type.equals (StaticStrings.COMMENTED_LEVELS_STR)) {
658 command_element = parseIndexOptions (command_str, document, StaticStrings.LEVELS_STR, false);
659 }
660 if(command_element == null && command_type.equals (StaticStrings.LEVEL_DEFAULT_STR)) {
661 command_element = parseIndexOptionDefault (command_str, document, StaticStrings.LEVEL_DEFAULT_STR, true);
662 }
663 if(command_element == null && command_type.equals (StaticStrings.COMMENTED_LEVEL_DEFAULT_STR)) {
664 command_element = parseIndexOptionDefault (command_str, document, StaticStrings.LEVEL_DEFAULT_STR, false);
665 }
666 if (command_element == null && command_type.equals (StaticStrings.INDEXOPTIONS_STR)) {
667 command_element = parseIndexOptions (command_str, document, StaticStrings.INDEXOPTIONS_STR, true);
668 }
669 if (command_element == null && command_type.equals (StaticStrings.COMMENTED_INDEXOPTIONS_STR)) {
670 command_element = parseIndexOptions (command_str, document, StaticStrings.INDEXOPTIONS_STR, false);
671 }
672 if(command_element == null && command_type.equals (StaticStrings.COLLECTIONMETADATA_STR)) {
673 command_element = parseMetadata (tokenizer, document); // Revised to handle multiple lines
674 }
675 if(command_element == null && (command_type.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR) || command_type.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR) || command_type.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR) || command_type.equals (StaticStrings.BUILDTYPE_STR) || command_type.equals (StaticStrings.DATABASETYPE_STR))) {
676 command_element = parseMetadataSpecial (command_str, document);
677 // pre 2.71 hack
678 if (command_type.equals (StaticStrings.BUILDTYPE_STR)) {
679 build_types_parsed = true;
680 }
681 }
682 if(command_element == null && command_type.equals (StaticStrings.PLUGIN_STR)) {
683 command_element = parsePlugin (command_str, document);
684 }
685 // leave here for backwards compatibility
686 if(command_element == null && command_type.equals (StaticStrings.SEARCHTYPE_STR)) {
687 command_element = parseSearchType (command_str, document);
688 // pre 2.71 hack
689 search_types_parsed = true;
690
691 }
692 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_STR)) {
693 command_element = parseSubCollection (command_str, document);
694 }
695 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_STR)) {
696 command_element = parseSubCollectionDefaultIndex (command_str, document);
697 }
698 if(command_element == null && command_type.equals (StaticStrings.SUBCOLLECTION_INDEX_STR)) {
699 command_element = parseSubCollectionIndex (command_str, document);
700 }
701 if(command_element == null && (command_type.equals (StaticStrings.SUPERCOLLECTION_STR) || command_type.equals (StaticStrings.CCS_STR))) {
702 command_element = parseSuperCollection (command_str, document);
703 }
704 // Doesn't match any known type
705 command_type = null;
706 if(command_element == null) {
707 // No-one knows what to do with this command, so we create an Unknown command element
708 command_element = document.createElement (StaticStrings.UNKNOWN_ELEMENT);
709 XMLTools.setValue (command_element, command_str);
710 }
711 }
712 // Or an empty line to remember for later
713 else {
714 command_element = document.createElement (CollectionConfiguration.NEWLINE_ELEMENT);
715 }
716 // Now command element shouldn't be null so we append it to the collection config DOM, but only if we haven't been told not to add it
717 //if(append_element) {
718 collect_cfg_element.appendChild (command_element);
719 //}
720 }
721 if (!build_types_parsed) {
722 String buildtype_type = BuildTypeManager.BUILD_TYPE_MG;
723 if (search_types_parsed) {
724 buildtype_type = BuildTypeManager.BUILD_TYPE_MGPP;
725 }
726 Element command_element = parseMetadataSpecial (StaticStrings.BUILDTYPE_STR+" "+buildtype_type, document);
727 Node target_node = CollectionConfiguration.findInsertionPoint (command_element);
728 if(target_node != null) {
729 collect_cfg_element.insertBefore (command_element, target_node);
730 }
731 else {
732 collect_cfg_element.appendChild (command_element);
733 }
734
735 }
736 return saved_collect_cfg_string_buffer.toString();
737 }
738 catch(Exception exception) {
739 DebugStream.println ("Error in CollectionConfiguration.parse(java.io.File): " + exception);
740 DebugStream.printStackTrace (exception);
741 }
742
743 return null;
744 }
745
746
747 /** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix.
748 * @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string
749 * @return a HashMap containing the arguments parsed
750 */
751 static public HashMap parseArguments (CommandTokenizer tokenizer) {
752 HashMap arguments = new HashMap ();
753 String name = null;
754 String value = null;
755 while(tokenizer.hasMoreTokens () || name != null) {
756 // First we retrieve a name if we need one.
757 if(name == null) {
758 name = tokenizer.nextToken ();
759 }
760 // Now we attempt to retrieve a value
761 if(tokenizer.hasMoreTokens ()) {
762 value = tokenizer.nextToken ();
763 // Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop.
764 // The value is not a name if it contains a space character: it's a quoted value
765 if (value.startsWith(StaticStrings.MINUS_CHARACTER) && value.indexOf(StaticStrings.SPACE_CHARACTER) == -1) {
766 arguments.put (name, null);
767 name = value;
768 }
769 // Otherwise we have a typical name->value pair ready to go
770 else {
771 arguments.put (name, value);
772 name = null;
773 }
774 }
775 // Otherwise its a binary flag
776 else {
777 arguments.put (name, null);
778 name = null;
779 }
780 }
781 return arguments;
782 }
783
784 static private Element parseClassify (String command_str, Document document) {
785 Element command_element = null;
786 try {
787 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
788 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
789 if(tokenizer.countTokens () >= 2) { // Must support "classify Phind" (no args)
790 command_element = document.createElement (StaticStrings.CLASSIFY_ELEMENT);
791 // First token is classify
792 tokenizer.nextToken ();
793 // The next token is the classifier type
794 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, tokenizer.nextToken ());
795 // Now we parse out the remaining arguments into a hashmapping from name to value
796 HashMap arguments = parseArguments (tokenizer);
797 // Assign the arguments as Option elements
798 Iterator names = arguments.keySet ().iterator ();
799 while(names.hasNext ()) {
800 String name = (String) names.next ();
801 String value = (String) arguments.get (name); // Can be null
802 Element option_element = document.createElement (StaticStrings.OPTION_ELEMENT);
803 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name.substring (1));
804 if(value != null) {
805 // Remove any speech marks appended in strings containing whitespace
806 if(value.startsWith (StaticStrings.SPEECH_CHARACTER) && value.endsWith (StaticStrings.SPEECH_CHARACTER)) {
807 value = value.substring (1, value.length () - 1);
808 }
809 XMLTools.setValue (option_element, value);
810 }
811 option_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
812 command_element.appendChild (option_element);
813 option_element = null;
814 name = null;
815 value = null;
816 }
817 names = null;
818 arguments = null;
819 }
820 tokenizer = null;
821 }
822 catch(Exception error) {
823 }
824 return command_element;
825 }
826
827 static private Element parseFormat (CommandTokenizer tokenizer, Document document) {
828 Element command_element = null;
829 try {
830 command_element = document.createElement (StaticStrings.FORMAT_ELEMENT);
831 String name_str = tokenizer.nextToken ();
832 String value_str = tokenizer.nextToken ();
833 if(name_str != null && value_str != null) {
834 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
835 // If the value is true or false we add it as an attribute
836 if(value_str.equalsIgnoreCase (StaticStrings.TRUE_STR) || value_str.equalsIgnoreCase (StaticStrings.FALSE_STR)) {
837 command_element.setAttribute (StaticStrings.VALUE_ATTRIBUTE, value_str.toLowerCase ());
838 }
839 // Otherwise it gets added as a text node
840 else {
841 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
842 value_str = Codec.transform (value_str, Codec.GREENSTONE_TO_DOM);
843 XMLTools.setValue (command_element, value_str);
844 }
845 }
846 else {
847 command_element = null;
848 }
849 name_str = null;
850 value_str = null;
851 }
852 catch (Exception exception) {
853 DebugStream.printStackTrace (exception);
854 command_element = null;
855 }
856 return command_element;
857 }
858
859 static private Element parseIndex (String command_str, Document document) {
860 Element command_element = null;
861 try {
862 StringTokenizer tokenizer = new StringTokenizer (command_str);
863 String command = tokenizer.nextToken ();
864 command_element = document.createElement (StaticStrings.INDEXES_ELEMENT);
865 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (command.equals (StaticStrings.INDEX_STR) ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
866 command = null;
867 if(!tokenizer.hasMoreTokens ()) {
868
869 // there are no indexes
870 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.FALSE_STR);
871 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.FALSE_STR); // for now
872 tokenizer = null;
873 return command_element;
874 }
875
876 while(tokenizer.hasMoreTokens ()) {
877 Element index_element = document.createElement (StaticStrings.INDEX_ELEMENT);
878 String index_str = tokenizer.nextToken ();
879 // There are two types of index we have to consider. MG versions use "level:source,source" while MGPP versions use "source,source source"
880 if(index_str.indexOf (StaticStrings.COLON_CHARACTER) != -1) {
881 index_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE, index_str.substring (0, index_str.indexOf (StaticStrings.COLON_CHARACTER)));
882 index_str = index_str.substring (index_str.indexOf (StaticStrings.COLON_CHARACTER) + 1);
883 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.FALSE_STR);
884 }
885 else {
886 command_element.setAttribute (StaticStrings.MGPP_ATTRIBUTE, StaticStrings.TRUE_STR);
887 }
888 StringTokenizer content_tokenizer = new StringTokenizer (index_str, StaticStrings.COMMA_CHARACTER);
889 while(content_tokenizer.hasMoreTokens ()) {
890 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
891 String content_str = content_tokenizer.nextToken ();
892 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
893 if(content_str.indexOf (StaticStrings.NS_SEP) == -1) {
894 if(content_str.equals (StaticStrings.TEXT_STR) || content_str.equals (StaticStrings.ALLFIELDS_STR) || content_str.equals(StaticStrings.METADATA_STR)) {
895 // Our special strings are OK.
896 }
897 else {
898 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
899 }
900 }
901 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_str);
902 index_element.appendChild (content_element);
903 content_element = null;
904 }
905 content_tokenizer = null;
906 index_str = null;
907 command_element.appendChild (index_element);
908 index_element = null;
909 }
910 tokenizer = null;
911 }
912 catch (Exception exception) {
913 exception.printStackTrace ();
914 }
915 return command_element;
916 }
917
918 static private Element parseIndexDefault (String command_str, Document document) {
919 Element command_element = null;
920 try {
921 StringTokenizer tokenizer = new StringTokenizer (command_str);
922 if(tokenizer.countTokens () >= 2) {
923 command_element = document.createElement (StaticStrings.INDEX_DEFAULT_ELEMENT);
924 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken ()).equals (StaticStrings.INDEX_DEFAULT_STR) ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
925 String index_str = tokenizer.nextToken ();
926 String level="";
927 if (index_str.indexOf (StaticStrings.COLON_CHARACTER) !=-1) {
928 level = index_str.substring (0, index_str.indexOf (StaticStrings.COLON_CHARACTER));
929 }
930
931 command_element.setAttribute (StaticStrings.LEVEL_ATTRIBUTE,level);
932
933 String content_str = index_str;
934
935 if (index_str.indexOf (StaticStrings.COLON_CHARACTER) !=-1) {
936 content_str = index_str.substring (index_str.indexOf (StaticStrings.COLON_CHARACTER) + 1);
937 }
938
939 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
940 while(content_tokenizer.hasMoreTokens ()) {
941 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
942 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
943 command_element.appendChild (content_element);
944 content_element = null;
945 }
946 content_tokenizer = null;
947 content_str = null;
948 content_str = null;
949 index_str = null;
950 }
951 tokenizer = null;
952 }
953 catch (Exception exception) {
954 }
955 return command_element;
956 }
957
958 static private Element parseSortfields (String command_str, Document document) {
959 Element command_element = null;
960 try {
961 StringTokenizer tokenizer = new StringTokenizer (command_str);
962 String command = tokenizer.nextToken ();
963 command_element = document.createElement (StaticStrings.SORTS_ELEMENT);
964 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
965 command = null;
966 if(!tokenizer.hasMoreTokens ()) {
967
968 // there are no sortfields
969 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.FALSE_STR);
970 tokenizer = null;
971 return command_element;
972 }
973
974 while(tokenizer.hasMoreTokens ()) {
975 Element index_element = document.createElement (StaticStrings.SORT_ELEMENT);
976 String index_str = tokenizer.nextToken ();
977 StringTokenizer content_tokenizer = new StringTokenizer (index_str, StaticStrings.COMMA_CHARACTER);
978 while(content_tokenizer.hasMoreTokens ()) {
979 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
980 String content_str = content_tokenizer.nextToken ();
981 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
982 if(content_str.indexOf (StaticStrings.NS_SEP) == -1) {
983 if(content_str.equals (StaticStrings.RANK_STR) || content_str.equals (StaticStrings.NONE_STR) ) {
984 // Our special strings are OK.
985 }
986 else {
987 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
988 }
989 }
990 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_str);
991 index_element.appendChild (content_element);
992 content_element = null;
993 }
994 content_tokenizer = null;
995 index_str = null;
996 command_element.appendChild (index_element);
997 index_element = null;
998 }
999 tokenizer = null;
1000 }
1001 catch (Exception exception) {
1002 exception.printStackTrace ();
1003 }
1004 return command_element;
1005 }
1006
1007 static private Element parseSortfieldDefault (String command_str, Document document) {
1008 Element command_element = null;
1009 try {
1010 StringTokenizer tokenizer = new StringTokenizer (command_str);
1011 if(tokenizer.countTokens () >= 2) {
1012 command_element = document.createElement (StaticStrings.SORT_DEFAULT_ELEMENT);
1013 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken ()).equals (StaticStrings.SORT_DEFAULT_STR) ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
1014 String index_str = tokenizer.nextToken ();
1015
1016 String content_str = index_str;
1017
1018
1019 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
1020 while(content_tokenizer.hasMoreTokens ()) {
1021 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
1022 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
1023 command_element.appendChild (content_element);
1024 content_element = null;
1025 }
1026 content_tokenizer = null;
1027 content_str = null;
1028 content_str = null;
1029 index_str = null;
1030 }
1031 tokenizer = null;
1032 }
1033 catch (Exception exception) {
1034 }
1035 return command_element;
1036 }
1037
1038 static private Element parseLanguage (String command_str, Document document) {
1039 Element command_element = null;
1040 try {
1041 StringTokenizer tokenizer = new StringTokenizer (command_str);
1042 tokenizer.nextToken ();
1043 if(tokenizer.hasMoreTokens ()) {
1044 command_element = document.createElement (StaticStrings.LANGUAGES_ELEMENT);
1045 while(tokenizer.hasMoreTokens ()) {
1046 Element language_element = document.createElement (StaticStrings.LANGUAGE_ELEMENT);
1047 language_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
1048 command_element.appendChild (language_element);
1049 language_element = null;
1050 }
1051 }
1052 tokenizer = null;
1053 }
1054 catch (Exception exception) {
1055 }
1056 return command_element;
1057 }
1058
1059 static private Element parseLanguageDefault (String command_str, Document document) {
1060 Element command_element = null;
1061 try {
1062 StringTokenizer tokenizer = new StringTokenizer (command_str);
1063 if(tokenizer.countTokens () >= 2) {
1064 command_element = document.createElement (StaticStrings.LANGUAGE_DEFAULT_ELEMENT);
1065 tokenizer.nextToken ();
1066 String default_language_str = tokenizer.nextToken ();
1067 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, default_language_str);
1068 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1069 default_language_str = null;
1070 }
1071 tokenizer = null;
1072 }
1073 catch (Exception exception) {
1074 }
1075 return command_element;
1076 }
1077
1078 static private Element parseLanguageMetadata (String command_str, Document document) {
1079 Element command_element = null;
1080 try {
1081 StringTokenizer tokenizer = new StringTokenizer (command_str);
1082 if(tokenizer.countTokens () >= 2) {
1083 command_element = document.createElement (StaticStrings.LANGUAGE_METADATA_ELEMENT);
1084 tokenizer.nextToken ();
1085 String language_metadata_str = tokenizer.nextToken ();
1086 if (language_metadata_str.indexOf (StaticStrings.NS_SEP) == -1) {
1087 language_metadata_str = StaticStrings.EXTRACTED_NAMESPACE + language_metadata_str;
1088 }
1089 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, language_metadata_str);
1090 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1091 language_metadata_str = null;
1092 }
1093 tokenizer = null;
1094
1095 }
1096 catch (Exception exception) {
1097 }
1098 return command_element;
1099 }
1100
1101 static private Element parseIndexOptions (String command_str, Document document, String type, boolean assigned) {
1102 Element command_element = null;
1103 try {
1104 StringTokenizer tokenizer = new StringTokenizer (command_str);
1105 // First token is command type
1106 String command = tokenizer.nextToken ();
1107 if(tokenizer.hasMoreTokens ()) {
1108 command_element = document.createElement (StaticStrings.INDEXOPTIONS_ELEMENT);
1109 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE,type);
1110 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (assigned ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR));
1111 while(tokenizer.hasMoreTokens ()) {
1112 Element option_element = document.createElement (StaticStrings.INDEXOPTION_ELEMENT);
1113 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
1114 command_element.appendChild (option_element);
1115 option_element = null;
1116 }
1117 }
1118 command = null;
1119 }
1120 catch(Exception exception) {
1121 }
1122 return command_element;
1123 }
1124
1125 static private Element parseIndexOptionDefault (String command_str, Document document, String type, boolean assigned) {
1126 Element command_element = null;
1127 try {
1128 StringTokenizer tokenizer = new StringTokenizer (command_str);
1129 // First token is command type
1130 String command = tokenizer.nextToken ();
1131 if(tokenizer.hasMoreTokens ()) {
1132 command_element = document.createElement (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT);
1133 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, (assigned ? StaticStrings.TRUE_STR : StaticStrings.FALSE_STR)); // is it commented out or not?
1134 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, type);
1135 command_element.setAttribute (StaticStrings.VALUE_ATTRIBUTE, tokenizer.nextToken ());
1136 }
1137
1138 tokenizer = null;
1139 }
1140 catch (Exception exception) {
1141 }
1142 return command_element;
1143 }
1144
1145 static private Element parseMetadata (CommandTokenizer tokenizer, Document document) {
1146 Element command_element = null;
1147 boolean is_search_meta = false;
1148 try {
1149
1150 String name_str = tokenizer.nextToken ();
1151 if (name_str.startsWith(StaticStrings.DOT_CHARACTER)) {
1152 is_search_meta = true;
1153 name_str = name_str.substring(1); // remove the dot
1154 }
1155 String value_str = tokenizer.nextToken ();
1156 if(name_str != null && value_str != null) {
1157 String language_str = Configuration.getLanguage ();
1158 // Check if the value string is actually a language string
1159 if(value_str.startsWith (StaticStrings.LBRACKET_CHARACTER) && value_str.endsWith (StaticStrings.RBRACKET_CHARACTER)) {
1160 language_str = value_str.substring (value_str.indexOf (StaticStrings.LANGUAGE_ARGUMENT) + 2, value_str.length () - 1);
1161 value_str = tokenizer.nextToken ();
1162 }
1163 if(value_str != null) {
1164 // Ready the value str (which can contain all sorts of funky characters) for writing as a DOM value
1165 if (is_search_meta) {
1166 command_element = document.createElement (StaticStrings.SEARCHMETADATA_ELEMENT);
1167 command_element.setAttribute(StaticStrings.TYPE_ATTRIBUTE, SearchMeta.TYPE_SEARCH);
1168 } else {
1169 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_ELEMENT);
1170 }
1171 value_str = Codec.transform (value_str, Codec.GREENSTONE_TO_DOM);
1172 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
1173 command_element.setAttribute (StaticStrings.LANGUAGE_ATTRIBUTE, language_str);
1174 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1175 XMLTools.setValue (command_element, value_str);
1176 }
1177 else {
1178 command_element = null;
1179 }
1180 language_str = null;
1181 }
1182 else {
1183 command_element = null;
1184 }
1185 name_str = null;
1186 value_str = null;
1187 }
1188 catch (Exception exception) {
1189 DebugStream.printStackTrace (exception);
1190 command_element = null;
1191 }
1192 return command_element;
1193 }
1194
1195 static private Element parseMetadataSpecial (String command_str, Document document) {
1196 Element command_element = null;
1197 try {
1198 StringTokenizer tokenizer = new StringTokenizer (command_str);
1199 if(tokenizer.countTokens () >= 2) {
1200 String name_str = tokenizer.nextToken ();
1201 String value_str = tokenizer.nextToken ();
1202 if (name_str.equals (StaticStrings.COLLECTIONMETADATA_CREATOR_STR)) {
1203 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT);
1204 }
1205 else if(name_str.equals (StaticStrings.COLLECTIONMETADATA_MAINTAINER_STR)) {
1206 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT);
1207 }
1208 else if(name_str.equals (StaticStrings.COLLECTIONMETADATA_PUBLIC_STR)) {
1209 command_element = document.createElement (StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT);
1210 }
1211 else if (name_str.equals (StaticStrings.BUILDTYPE_STR)) {
1212 command_element = document.createElement (StaticStrings.BUILDTYPE_ELEMENT);
1213 }
1214 else if (name_str.equals (StaticStrings.DATABASETYPE_STR)) {
1215 command_element = document.createElement (StaticStrings.DATABASETYPE_ELEMENT);
1216 }
1217 if(command_element != null) {
1218 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name_str);
1219 command_element.setAttribute (StaticStrings.LANGUAGE_ATTRIBUTE, StaticStrings.ENGLISH_LANGUAGE_STR);
1220 command_element.setAttribute (StaticStrings.SPECIAL_ATTRIBUTE, StaticStrings.TRUE_STR);
1221 command_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1222 if(value_str.startsWith (StaticStrings.SPEECH_CHARACTER) && value_str.endsWith (StaticStrings.SPEECH_CHARACTER)) {
1223 value_str = value_str.substring (1, value_str.length () - 1);
1224 }
1225 XMLTools.setValue (command_element, value_str);
1226 }
1227 value_str = null;
1228 name_str = null;
1229 }
1230 tokenizer = null;
1231 }
1232 catch (Exception exception) {
1233 }
1234 return command_element;
1235 }
1236
1237 static private Element parsePlugin (String command_str, Document document) {
1238 Element command_element = null;
1239 try {
1240 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
1241 // Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1242 if(tokenizer.countTokens () >= 2) {
1243 command_element = document.createElement (StaticStrings.PLUGIN_ELEMENT);
1244 // First token is plugin
1245 tokenizer.nextToken ();
1246 // The next token is the type
1247 String type = tokenizer.nextToken ();
1248 type = Utility.ensureNewPluginName(type);
1249 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, type);
1250 // Now we parse out the remaining arguments into a hashmapping from name to value
1251 HashMap arguments = parseArguments (tokenizer);
1252 // also watch out for the deprecated -use_metadata_files option to RecPlug and remove it
1253 Iterator names = arguments.keySet ().iterator ();
1254 while(names.hasNext ()) {
1255 String name = (String) names.next ();
1256 String value = (String) arguments.get (name); // Can be null
1257
1258 if(type.equals (StaticStrings.RECPLUG_STR) && name.substring (1).equals (StaticStrings.USE_METADATA_FILES_ARGUMENT)) {
1259 continue; // ignore this option
1260 }
1261 Element option_element = document.createElement (StaticStrings.OPTION_ELEMENT);
1262 option_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, name.substring (1));
1263 option_element.setAttribute (StaticStrings.ASSIGNED_ATTRIBUTE, StaticStrings.TRUE_STR);
1264 if(value != null) {
1265 // Remove any speech marks appended in strings containing whitespace
1266 if(value.startsWith (StaticStrings.SPEECH_CHARACTER) && value.endsWith (StaticStrings.SPEECH_CHARACTER)) {
1267 value = value.substring (1, value.length () - 1);
1268 }
1269 XMLTools.setValue (option_element, value);
1270 }
1271 command_element.appendChild (option_element);
1272 option_element = null;
1273 name = null;
1274 value = null;
1275 }
1276
1277 type = null;
1278 names = null;
1279 arguments = null;
1280 }
1281 tokenizer = null;
1282 }
1283 catch(Exception exception) {
1284 // This catch clause had been left empty. If this is deliberate then
1285 // we should have a comment here explaining why there is no need to
1286 // print anything out. Am assuming this is mistake for now, and
1287 // have added in a call to printStackTrace()
1288 System.err.println("Malformed plugin statement");
1289 exception.printStackTrace();
1290 }
1291 return command_element;
1292 }
1293
1294 /* search types are now handled as formats - leave this here to convert in case we have an old config file */
1295 static private Element parseSearchType (String command_str, Document document) {
1296 Element command_element = null;
1297 try {
1298 StringTokenizer tokenizer = new StringTokenizer (command_str);
1299 // First token is command type (searchtype)
1300 tokenizer.nextToken ();
1301 if(tokenizer.hasMoreTokens ()) {
1302 command_element = document.createElement (StaticStrings.FORMAT_ELEMENT);
1303 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, "SearchTypes");
1304 String value = tokenizer.nextToken ();
1305 while(tokenizer.hasMoreTokens ()) {
1306 value += ","+tokenizer.nextToken ();
1307 }
1308 value = Codec.transform (value, Codec.GREENSTONE_TO_DOM);
1309 XMLTools.setValue (command_element, value);
1310 }
1311 }
1312 catch(Exception exception) {
1313 }
1314 return command_element;
1315 }
1316
1317 static private Element parseSubCollection (String command_str, Document document) {
1318 Element command_element = null;
1319 try {
1320 CommandTokenizer tokenizer = new CommandTokenizer (command_str);
1321 if(tokenizer.countTokens () >= 3) {
1322 command_element = document.createElement (StaticStrings.SUBCOLLECTION_ELEMENT);
1323 // First token is command type
1324 tokenizer.nextToken ();
1325 // Then subcollection identifier
1326 command_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
1327 // Then finally the pattern used to build the subcollection partition
1328 String full_pattern_str = tokenizer.nextToken ();
1329 // Set inclusion/exclusion flag and remove any exclamation mark
1330 boolean exclusion = full_pattern_str.startsWith (StaticStrings.EXCLAMATION_CHARACTER);
1331 if (exclusion) {
1332 full_pattern_str = full_pattern_str.substring (1, full_pattern_str.length ());
1333 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, StaticStrings.EXCLUDE_STR);
1334 }
1335 else {
1336 command_element.setAttribute (StaticStrings.TYPE_ATTRIBUTE, StaticStrings.INCLUDE_STR);
1337 }
1338
1339 // Let's make sure it is a valid Greenstone configuration line
1340 String[] results = full_pattern_str.split("\\" + StaticStrings.SEPARATOR_CHARACTER, 3);
1341
1342 if (results.length >= 2) {
1343 String content_str = results[0];
1344 // Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1345 if (!content_str.equals (StaticStrings.FILENAME_STR) && content_str.indexOf (StaticStrings.NS_SEP) == -1) {
1346 content_str = StaticStrings.EXTRACTED_NAMESPACE + content_str;
1347 }
1348 command_element.setAttribute (StaticStrings.CONTENT_ATTRIBUTE, content_str);
1349 XMLTools.setValue (command_element, results[1]);
1350 if (results.length >= 3) {
1351 command_element.setAttribute (StaticStrings.OPTIONS_ATTRIBUTE, results[2]);
1352 }
1353 }
1354 }
1355 }
1356 catch(Exception exception) {
1357 exception.printStackTrace ();
1358 }
1359 return command_element;
1360 }
1361
1362 static private Element parseSubCollectionDefaultIndex (String command_str, Document document) {
1363 Element command_element = null;
1364 try {
1365 StringTokenizer tokenizer = new StringTokenizer (command_str);
1366 if(tokenizer.countTokens () == 2) {
1367 command_element = document.createElement (StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT);
1368 tokenizer.nextToken ();
1369 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1370 String content_str = tokenizer.nextToken ();
1371 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
1372 while(content_tokenizer.hasMoreTokens ()) {
1373 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
1374 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
1375 command_element.appendChild (content_element);
1376 content_element = null;
1377 }
1378 content_tokenizer = null;
1379 content_str = null;
1380 }
1381 tokenizer = null;
1382 }
1383 catch(Exception exception) {
1384 }
1385 return command_element;
1386 }
1387
1388 static private Element parseSubCollectionIndex (String command_str, Document document) {
1389 Element command_element = null;
1390 try {
1391 StringTokenizer tokenizer = new StringTokenizer (command_str);
1392 tokenizer.nextToken ();
1393 if(tokenizer.hasMoreTokens ()) {
1394 command_element = document.createElement (StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT);
1395 }
1396 while(tokenizer.hasMoreTokens ()) {
1397 Element subcollectionindex_element = document.createElement (StaticStrings.INDEX_ELEMENT);
1398 //command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1399 String content_str = tokenizer.nextToken ();
1400 StringTokenizer content_tokenizer = new StringTokenizer (content_str, StaticStrings.COMMA_CHARACTER);
1401 while(content_tokenizer.hasMoreTokens ()) {
1402 Element content_element = document.createElement (StaticStrings.CONTENT_ELEMENT);
1403 content_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, content_tokenizer.nextToken ());
1404 subcollectionindex_element.appendChild (content_element);
1405 content_element = null;
1406 }
1407 content_tokenizer = null;
1408 content_str = null;
1409 command_element.appendChild (subcollectionindex_element);
1410 subcollectionindex_element = null;
1411 }
1412 tokenizer = null;
1413 }
1414 catch (Exception exception) {
1415 }
1416 return command_element;
1417 }
1418
1419 static private Element parseSuperCollection (String command_str, Document document) {
1420 Element command_element = null;
1421 try {
1422 StringTokenizer tokenizer = new StringTokenizer (command_str);
1423 if(tokenizer.countTokens () >= 3) {
1424 command_element = document.createElement (StaticStrings.SUPERCOLLECTION_ELEMENT);
1425 tokenizer.nextToken ();
1426 while(tokenizer.hasMoreTokens ()) {
1427 Element collection_element = document.createElement (StaticStrings.COLLECTION_ELEMENT);
1428 collection_element.setAttribute (StaticStrings.NAME_ATTRIBUTE, tokenizer.nextToken ());
1429 command_element.appendChild (collection_element);
1430 collection_element = null;
1431 }
1432 }
1433 tokenizer = null;
1434 }
1435 catch(Exception exception) {
1436 }
1437 return command_element;
1438 }
1439
1440 static private String pluginToString (Element command_element) {
1441 if(command_element.getAttribute (StaticStrings.SEPARATOR_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
1442 return "";
1443 }
1444 StringBuffer text = new StringBuffer (StaticStrings.PLUGIN_STR);
1445 text.append (StaticStrings.TAB_CHARACTER);
1446 text.append (command_element.getAttribute (StaticStrings.TYPE_ATTRIBUTE));
1447 // Retrieve, and output, the arguments
1448 NodeList option_elements = command_element.getElementsByTagName (StaticStrings.OPTION_ELEMENT);
1449 int option_elements_length = option_elements.getLength ();
1450 if(option_elements_length > 0) {
1451 for(int j = 0; j < option_elements_length; j++) {
1452 Element option_element = (Element) option_elements.item (j);
1453 if(option_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.TRUE_STR)) {
1454 text.append (StaticStrings.SPACE_CHARACTER);
1455 text.append (StaticStrings.MINUS_CHARACTER);
1456 text.append (option_element.getAttribute (StaticStrings.NAME_ATTRIBUTE));
1457 String value_str = XMLTools.getValue (option_element);
1458 if (value_str.length () > 0) {
1459 text.append (StaticStrings.SPACE_CHARACTER);
1460 if(value_str.indexOf (StaticStrings.SPACE_CHARACTER) != -1) {
1461 // enclose in quotes
1462 text.append(StaticStrings.SPEECH_CHARACTER);
1463 text.append(value_str);
1464 text.append(StaticStrings.SPEECH_CHARACTER);
1465 } else {
1466
1467 text.append(value_str);
1468 }
1469 }
1470
1471 value_str = null;
1472 }
1473 option_element = null;
1474 }
1475 }
1476 option_elements = null;
1477
1478 return text.toString ();
1479 }
1480
1481 static public String generateStringVersion(Document document) {
1482
1483 StringBuffer collect_cfg_string_buffer = new StringBuffer ();
1484 NodeList command_elements = document.getDocumentElement ().getChildNodes ();
1485 boolean just_wrote_blank_line = false; // Prevent two or more blank lines in a row
1486 for (int i = 0; i < command_elements.getLength (); i++) {
1487 Node command_node = command_elements.item (i);
1488 if (!(command_node instanceof Element)) {
1489 // We're only interested in Elements
1490 continue;
1491 }
1492 Element command_element = (Element) command_node;
1493
1494 // Handle NewLine elements (blank lines)
1495 if (command_element.getNodeName ().equals (CollectionConfiguration.NEWLINE_ELEMENT) && !just_wrote_blank_line) {
1496 collect_cfg_string_buffer.append ("\n");
1497 just_wrote_blank_line = true;
1498 }
1499
1500 // Anything else we write to file, but only if it has been assigned, except for index and level commands
1501 // (which just get commented out if unassigned -- a side effect of MG & MGPP compatibility)
1502 else if (!command_element.getAttribute (StaticStrings.ASSIGNED_ATTRIBUTE).equals (StaticStrings.FALSE_STR) || command_element.getNodeName ().equals (StaticStrings.INDEXES_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEX_DEFAULT_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEXOPTIONS_ELEMENT) || command_element.getNodeName ().equals (StaticStrings.INDEXOPTION_DEFAULT_ELEMENT)) {
1503 String command = toString(command_element);
1504
1505 if (command != null && command.length ()> 0 ) {
1506 collect_cfg_string_buffer.append (command + "\n");
1507 just_wrote_blank_line = false;
1508 }
1509 }
1510 }
1511
1512 return collect_cfg_string_buffer.toString ();
1513 }
1514
1515
1516}
Note: See TracBrowser for help on using the repository browser.