Context Navigation

source: trunk/gli/src/org/greenstone/gatherer/cdm/CollectionConfiguration.java@ 5164

Last change on this file since 5164 was 5164, checked in by jmt12, 21 years ago
I can't remember what has changed, but I bet it was for the better
Property svn:keywords set to `Author Date Id Revision`
File size: 64.6 KB

Line
1	/**
2	*#########################################################################
3	*
4	* A component of the Gatherer application, part of the Greenstone digital
5	* library suite from the New Zealand Digital Library Project at the
6	* University of Waikato, New Zealand.
7	*
8	* Author: John Thompson, Greenstone Digital Library, University of Waikato
9	*
10	* Copyright (C) 1999 New Zealand Digital Library Project
11	*
12	* This program is free software; you can redistribute it and/or modify
13	* it under the terms of the GNU General Public License as published by
14	* the Free Software Foundation; either version 2 of the License, or
15	* (at your option) any later version.
16	*
17	* This program is distributed in the hope that it will be useful,
18	* but WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	* GNU General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, write to the Free Software
24	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25	*########################################################################
26	*/
27	package org.greenstone.gatherer.cdm;
28	import java.awt.*;
29	import java.awt.event.*;
30	import java.io.*;
31	import java.util.*;
32	import javax.swing.*;
33	import org.greenstone.gatherer.Gatherer;
34	import org.greenstone.gatherer.cdm.CommandTokenizer;
35	import org.greenstone.gatherer.msm.MSMUtils;
36	import org.greenstone.gatherer.util.DOMTree;
37	import org.greenstone.gatherer.util.StaticStrings;
38	import org.greenstone.gatherer.util.Utility;
39	import org.w3c.dom.*;
40	/** This class provides either access to a pseudo-G3 document, or parses a collect.cfg file in such a way as to provide an xml-type view of its content. This later version is useful as it allows the manipulation and free form editing of a legacy collect.cfg file while still allowing the various CDM data managers to base themselves directly on this model (whereas they used to be independant ListModels which clobbered the ordering of unparsed commands).
41	* @author John Thompson, Greenstone Digital Library, University of Waikato
42	* @version 2.3d
43	*/
44	public class CollectionConfiguration
45	extends StaticStrings {
46
47	static public Document document;
48
49	static public void main(String[] args) {
50	if(args.length >= 1) {
51	File file = new File(args[0]);
52	CollectionConfiguration collect_cfg = new CollectionConfiguration(file);
53	collect_cfg.save(true);
54	collect_cfg.save(false);
55	collect_cfg = null;
56	}
57	else {
58	System.out.println("Usage: CollectionConfiguration <filename>");
59	}
60	}
61
62	/** Find the best insertion position for the given DOM Element. This should try to match command tag, and if found should then try to group by name or type (eg CollectionMeta), or append to end is no such grouping exists (eg PlugIns). Failing a command match it will check against the command order for the best insertion location.
63	* @param element the command Element to be inserted
64	* @return the Element which the given command should be inserted before, or null to append to end of list
65	*/
66	static public Node findInsertionPoint(Element target_element) {
67	///ystem.err.println("Find insertion point: " + target_element.getNodeName());
68	String target_element_name = target_element.getNodeName();
69	Element document_element = document.getDocumentElement();
70	// Try to find commands with the same tag.
71	NodeList matching_elements = document_element.getElementsByTagName(target_element_name);
72	// If we found matching elements, then we have our most likely insertion location, so check within for groupings
73	if(matching_elements.getLength() != 0) {
74	///ystem.err.println("Found matching elements.");
75	// Only CollectionMeta are grouped.
76	if(target_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
77	///ystem.err.println("Dealing with collection metadata");
78	// Special case: CollectionMeta can be added at either the start or end of a collection configuration file. However the start position is reserved for special metadata, so if no non-special metadata can be found we must append to the end.
79	// So if the command to be added is special add it immediately after any other special command
80	if(target_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
81	int index = 0;
82	Element matched_element = (Element) matching_elements.item(index);
83	Element sibling_element = (Element) matched_element.getNextSibling();
84	while(sibling_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
85	index++;
86	matched_element = (Element) matching_elements.item(index);
87	sibling_element = (Element) matched_element.getNextSibling();
88	}
89	if(sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
90	Element newline_element = document.createElement(NEWLINE_ELEMENT);
91	document_element.insertBefore(newline_element, sibling_element);
92	}
93	return sibling_element;
94	}
95	// Otherwise try to find a matching 'name' and add after the last one in that group.
96	else {
97	int index = 0;
98	target_element_name = target_element.getAttribute(NAME_ATTRIBUTE);
99	boolean found = false;
100	// Skip all of the special metadata
101	Element matched_element = (Element) matching_elements.item(index);
102	while(matched_element.getAttribute(SPECIAL_ATTRIBUTE).equals(TRUE_STR)) {
103	index++;
104	matched_element = (Element) matching_elements.item(index);
105	}
106	// Begin search
107	while(!found && matched_element != null) {
108	if(matched_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
109	found = true;
110	}
111	else {
112	index++;
113	matched_element = (Element) matching_elements.item(index);
114	}
115	}
116	// If we found a match, we need to continue checking until we find the last name match.
117	if(found) {
118	index++;
119	Element previous_sibling = matched_element;
120	Element sibling_element = (Element) matching_elements.item(index);
121	while(sibling_element != null && sibling_element.getAttribute(NAME_ATTRIBUTE).equals(target_element_name)) {
122	previous_sibling = sibling_element;
123	index++;
124	sibling_element = (Element) matching_elements.item(index);
125	}
126	// Previous sibling now holds the command immediately before where we want to add, so find its next sibling and add to that. In this one case we can ignore new lines!
127	return previous_sibling.getNextSibling();
128	}
129	// If not found we just add after last metadata element
130	else {
131	Element last_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
132	return last_element.getNextSibling();
133	}
134	}
135
136	}
137	else {
138	///ystem.err.println("Not dealing with collection meta.");
139	Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
140	// One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
141	Node sibling_element = matched_element.getNextSibling();
142	if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
143	Element newline_element = document.createElement(NEWLINE_ELEMENT);
144	document_element.insertBefore(newline_element, sibling_element);
145	}
146	return sibling_element; // Note that this may be null
147	}
148	}
149	///ystem.err.println("No matching elements found.");
150	// Locate where this command is in the ordering
151	int command_index = -1;
152	for(int i = 0; command_index == -1 && i < COMMAND_ORDER.length; i++) {
153	if(COMMAND_ORDER[i].equals(target_element_name)) {
154	command_index = i;
155	}
156	}
157	///ystem.err.println("Command index is: " + command_index);
158	// Now move forward, checking for existing elements in each of the preceeding command orders.
159	int preceeding_index = command_index - 1;
160	///ystem.err.println("Searching before the target command.");
161	while(preceeding_index >= 0) {
162	matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[preceeding_index]);
163	// If we've found a match
164	if(matching_elements.getLength() > 0) {
165	// We add after the last element
166	Element matched_element = (Element) matching_elements.item(matching_elements.getLength() - 1);
167	// One final quick test. If the matched element is immediately followed by a NewLine command, then we insert another NewLine after the matched command, then return the NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
168	Node sibling_element = matched_element.getNextSibling();
169	if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
170	Element newline_element = document.createElement(NEWLINE_ELEMENT);
171	document_element.insertBefore(newline_element, sibling_element);
172	}
173	return sibling_element; // Note that this may be null
174	}
175	preceeding_index--;
176	}
177	// If all that fails, we now move backwards through the commands
178	int susceeding_index = command_index + 1;
179	///ystem.err.println("Searching after the target command.");
180	while(susceeding_index < COMMAND_ORDER.length) {
181	matching_elements = document_element.getElementsByTagName(COMMAND_ORDER[susceeding_index]);
182	// If we've found a match
183	if(matching_elements.getLength() > 0) {
184	// We add before the first element
185	Element matched_element = (Element) matching_elements.item(0);
186	// One final quick test. If the matched element is immediately preceeded by a NewLine command, then we insert another NewLine before the matched command, then return this new NewLine instead (thus the about to be inserted command will be placed between the two NewLines)
187	Node sibling_element = matched_element.getPreviousSibling();
188	if(sibling_element != null && sibling_element.getNodeName().equals(NEWLINE_ELEMENT)) {
189	Element newline_element = document.createElement(NEWLINE_ELEMENT);
190	document_element.insertBefore(newline_element, sibling_element);
191	}
192	return sibling_element; // Note that this may be null
193	}
194	susceeding_index++;
195	}
196	// Well. Apparently there are no other commands in this collection configuration. So append away...
197	return null;
198	}
199
200	static public String toString(Element command_element, boolean show_extracted_namespace) {
201	String command_element_name = command_element.getNodeName();
202	if(command_element_name.equals(CLASSIFY_ELEMENT)) {
203	return self.classifyToString(command_element, show_extracted_namespace);
204	}
205	else if(command_element_name.equals(FORMAT_ELEMENT)) {
206	return self.formatToString(command_element, show_extracted_namespace);
207	}
208	else if(command_element_name.equals(INDEXES_ELEMENT)) {
209	return self.indexesToString(command_element, show_extracted_namespace);
210	}
211	else if(command_element_name.equals(INDEX_DEFAULT_ELEMENT)) {
212	return self.indexDefaultToString(command_element, show_extracted_namespace);
213	}
214	else if(command_element_name.equals(LANGUAGES_ELEMENT)) {
215	return self.languagesToString(command_element);
216	}
217	else if(command_element_name.equals(LANGUAGE_DEFAULT_ELEMENT)) {
218	return self.languageDefaultToString(command_element);
219	}
220	else if(command_element_name.equals(LEVELS_ELEMENT)) {
221	return self.levelsToString(command_element);
222	}
223	else if(command_element_name.equals(COLLECTIONMETADATA_ELEMENT)) {
224	return self.metadataToString(command_element);
225	}
226	else if(command_element_name.equals(COLLECTIONMETADATA_CREATOR_ELEMENT)) {
227	return self.metadataToString(command_element);
228	}
229	else if(command_element_name.equals(COLLECTIONMETADATA_MAINTAINER_ELEMENT)) {
230	return self.metadataToString(command_element);
231	}
232	else if(command_element_name.equals(COLLECTIONMETADATA_PUBLIC_ELEMENT)) {
233	return self.metadataToString(command_element);
234	}
235	else if(command_element_name.equals(COLLECTIONMETADATA_BETA_ELEMENT)) {
236	return self.metadataToString(command_element);
237	}
238	else if(command_element_name.equals(PLUGIN_ELEMENT)) {
239	return self.pluginToString(command_element, show_extracted_namespace);
240	}
241	else if(command_element_name.equals(SEARCHTYPE_ELEMENT)) {
242	return self.searchtypeToString(command_element);
243	}
244	else if(command_element_name.equals(SUBCOLLECTION_ELEMENT)) {
245	return self.subcollectionToString(command_element, show_extracted_namespace);
246	}
247	else if(command_element_name.equals(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT)) {
248	return self.subcollectionDefaultIndexToString(command_element);
249	}
250	else if(command_element_name.equals(SUBCOLLECTION_INDEXES_ELEMENT)) {
251	return self.subcollectionIndexesToString(command_element);
252	}
253	else if(command_element_name.equals(SUPERCOLLECTION_ELEMENT)) {
254	return self.supercollectionToString(command_element);
255	}
256	else if(command_element_name.equals(UNKNOWN_ELEMENT)) {
257	return self.unknownToString(command_element);
258	}
259	return "";
260	}
261
262	/** Parses arguments from a tokenizer and returns a HashMap of mappings. The tricky bit here is that not all entries in the HashMap are name->value pairs, as some arguments are boolean and are turned on by their presence. Arguments are denoted by a '-' prefix.
263	* @param tokenizer a CommandTokenizer based on the unconsumed portion of a command string
264	* @return a HashMap containing the arguments parsed
265	*/
266	static public HashMap parseArguments(CommandTokenizer tokenizer) {
267	HashMap arguments = new HashMap();
268	String name = null;
269	String value = null;
270	while(tokenizer.hasMoreTokens() \|\| name != null) {
271	// First we retrieve a name if we need one.
272	if(name == null) {
273	name = tokenizer.nextToken();
274	}
275	// Now we attempt to retrieve a value
276	if(tokenizer.hasMoreTokens()) {
277	value = tokenizer.nextToken();
278	// Test if the value is actually a name, and if so add the name by itself, then put value into name so that it is parsed correctly during the next loop.
279	if(value.startsWith(StaticStrings.MINUS_CHARACTER)) {
280	arguments.put(name, null);
281	name = value;
282	}
283	// Otherwise we have a typical name->value pair ready to go
284	else {
285	arguments.put(name, value);
286	name = null;
287	}
288	}
289	// Otherwise its a binary flag
290	else {
291	arguments.put(name, null);
292	name = null;
293	}
294	}
295	return arguments;
296	}
297
298	static private ArrayList known_metadata;
299
300	static private CollectionConfiguration self;
301
302	static final private String EXTRACTED_PREFIX = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP;
303	/** Gives the preferred ordering of commands */
304	static final private String[] COMMAND_ORDER = {StaticStrings.COLLECTIONMETADATA_CREATOR_ELEMENT, StaticStrings.COLLECTIONMETADATA_MAINTAINER_ELEMENT, StaticStrings.COLLECTIONMETADATA_PUBLIC_ELEMENT, StaticStrings.COLLECTIONMETADATA_BETA_ELEMENT, StaticStrings.SEARCHTYPE_ELEMENT, StaticStrings.PLUGIN_ELEMENT, StaticStrings.INDEXES_ELEMENT, StaticStrings.LEVELS_ELEMENT, StaticStrings.INDEX_DEFAULT_ELEMENT, StaticStrings.LANGUAGES_ELEMENT, StaticStrings.LANGUAGE_DEFAULT_ELEMENT, StaticStrings.SUBCOLLECTION_ELEMENT, StaticStrings.SUBCOLLECTION_INDEXES_ELEMENT, StaticStrings.SUBCOLLECTION_DEFAULT_INDEX_ELEMENT, StaticStrings.SUPERCOLLECTION_ELEMENT, StaticStrings.CLASSIFY_ELEMENT, StaticStrings.FORMAT_ELEMENT, StaticStrings.COLLECTIONMETADATA_ELEMENT};
305
306	/ ********************** Public Data Members *************************/
307
308	/ ********************** Private Data Members *************************/
309
310	private File collect_config_file;
311
312	/ ********************** Public Methods *************************/
313
314	public CollectionConfiguration(File collect_config_file) {
315	this.self = this;
316	this.collect_config_file = collect_config_file;
317	// If collect_cfg is xml we can load it straight away
318	String collect_config_name = collect_config_file.getName();
319	if(collect_config_name.equals(COLLECTCONFIGURATION_XML)) {
320	// Parse with Utility but don't use class loader
321	document = Utility.parse(collect_config_file, false);
322	}
323	// Otherwise if this is a legacy collect.cfg file then read in the template and send to magic parser
324	else if(collect_config_name.equals(COLLECT_CFG)) {
325	document = Utility.parse(PSEUDO_COLLECTCONFIGURATION_XML, true);
326	parse(collect_config_file);
327	}
328	}
329
330	/** This debug facility shows the currently loaded collect.cfg or CollectConfig.xml file as a DOM tree. */
331	public void display() {
332	JDialog dialog = new JDialog(Gatherer.g_man, "Collection Configuration", false);
333	dialog.setSize(400,400);
334	JPanel content_pane = (JPanel) dialog.getContentPane();
335	final DOMTree tree = new DOMTree(document);
336	JButton refresh_button = new JButton("Refresh Tree");
337	refresh_button.addActionListener(new ActionListener() {
338	public void actionPerformed(ActionEvent event) {
339	tree.setDocument(document);
340	}
341	});
342	content_pane.setBorder(BorderFactory.createEmptyBorder(5,5,5,5));
343	content_pane.setLayout(new BorderLayout());
344	content_pane.add(new JScrollPane(tree), BorderLayout.CENTER);
345	content_pane.add(refresh_button, BorderLayout.SOUTH);
346	dialog.show();
347	}
348
349	public Element getBeta() {
350	Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_BETA_ELEMENT, null, null);
351	element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_BETA_STR);
352	element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
353	return element;
354	}
355
356	public Element getCreator() {
357	Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_CREATOR_ELEMENT, null, null);
358	element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_CREATOR_STR);
359	element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
360	return element;
361	}
362
363	public Element getDocumentElement() {
364	return document.getDocumentElement();
365	}
366
367	public File getFile() {
368	return collect_config_file;
369	}
370
371	/** Retrieve or create the languages Element. */
372	public Element getLanguages() {
373	return getOrCreateElementByTagName(LANGUAGES_ELEMENT, null, null);
374	}
375
376	public Element getLevels() {
377	return getOrCreateElementByTagName(LEVELS_ELEMENT, null, null);
378	}
379
380	public Element getMaintainer() {
381	Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_MAINTAINER_ELEMENT, null, null);
382	element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_MAINTAINER_STR);
383	element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
384	return element;
385	}
386
387	/** Retrieve or create the indexes Element. Note that this method behaves differently from the other getBlah methods, in that it also has to keep in mind that indexes come in two flavours, MG and MGPP. */
388	public Element getMGIndexes() {
389	return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, FALSE_STR);
390	}
391
392	public Element getMGPPIndexes() {
393	return getOrCreateElementByTagName(INDEXES_ELEMENT, MGPP_ATTRIBUTE, TRUE_STR);
394	}
395
396	public Element getPublic() {
397	Element element = getOrCreateElementByTagName(COLLECTIONMETADATA_PUBLIC_ELEMENT, null, null);
398	element.setAttribute(NAME_ATTRIBUTE, COLLECTIONMETADATA_PUBLIC_STR);
399	element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
400	return element;
401	}
402
403	/** Retrieve or create the searchtype element. */
404	public Element getSearchType() {
405	///ystem.err.println("Get or create element by tag name: " + name);
406	Element document_element = document.getDocumentElement();
407	NodeList elements = document_element.getElementsByTagName(SEARCHTYPE_ELEMENT);
408	int elements_length = elements.getLength();
409	if(elements_length > 0) {
410	document_element = null;
411	return (Element) elements.item(0);
412	}
413	// Create the element
414	Element element = document.createElement(SEARCHTYPE_ELEMENT);
415	Node target_node = findInsertionPoint(element);
416	if(target_node != null) {
417	document_element.insertBefore(element, target_node);
418	}
419	else {
420	document_element.appendChild(element);
421	}
422	document_element = null;
423	// Append a default search type node - form
424	Element a_searchtype_element = CollectionDesignManager.collect_config.document.createElement(CollectionConfiguration.CONTENT_ELEMENT);
425	a_searchtype_element.setAttribute(CollectionConfiguration.NAME_ATTRIBUTE, SearchTypeManager.SEARCH_TYPES[0]);
426	element.appendChild(a_searchtype_element);
427	return element;
428	}
429
430	/** Retrieve or create the subindexes Element. */
431	public Element getSubIndexes() {
432	return getOrCreateElementByTagName(SUBCOLLECTION_INDEXES_ELEMENT, null, null);
433	}
434
435	/** Retrieve or create the supercollections Element. */
436	public Element getSuperCollection() {
437	return getOrCreateElementByTagName(SUPERCOLLECTION_ELEMENT, null, null);
438	}
439
440	public void save() {
441	save(false);
442	}
443
444	public void save(boolean force_xml) {
445	if(collect_config_file.exists()) {
446	File original_file = new File(collect_config_file.getParentFile(), COLLECT_CFG);
447	File backup_file = new File(collect_config_file.getParentFile(), "collect.bak");
448	if(backup_file.exists()) {
449	backup_file.delete();
450	}
451	if(!original_file.renameTo(backup_file)) {
452	Gatherer.println("Can't rename collect.cfg");
453	}
454	}
455	if(force_xml \|\| collect_config_file.getName().equals(COLLECTCONFIGURATION_XML)) {
456	///ystem.err.println("Writing XML");
457	Utility.export(document, new File(collect_config_file.getParentFile(), COLLECTCONFIGURATION_XML));
458	}
459	else {
460	///ystem.err.println("Writing text");
461	try {
462	FileWriter file_writer = new FileWriter(collect_config_file, false);
463	BufferedWriter buffered_writer = new BufferedWriter(file_writer);
464	// In order to write out an old style collect.cfg we have to traverse the model and do several 'cute' tricks to ensure the collect.cfg is valid (for instance while every metadata element has a language attribute, only second or subsequent metadata, for a certain name, needs a language argument - hence the known metadata array. Note that within GLI the language will always be shown, but it doesn't crash and burn like G2 does, te-he).
465	known_metadata = new ArrayList();
466	Element collect_config_element = document.getDocumentElement();
467	NodeList command_elements = collect_config_element.getChildNodes();
468	boolean just_wrote_newline = false; // Prevent two or more newlines in a row
469	for(int i = 0; i < command_elements.getLength(); i++) {
470	Node command_node = command_elements.item(i);
471	if(command_node instanceof Element) {
472	Element command_element = (Element) command_node;
473	// The only thing left are NewLine elements
474	if(command_element.getNodeName().equals(NEWLINE_ELEMENT) && !just_wrote_newline) {
475	buffered_writer.newLine();
476	just_wrote_newline = true;
477	}
478	// Anything else we write to file, but only if it has been assigned, the exception being the Indexes element which just get commented if unassigned (a side effect of MG && MGPP compatibility)
479	else if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR) \|\| command_element.getNodeName().equals(INDEXES_ELEMENT) \|\| command_element.getNodeName().equals(INDEX_DEFAULT_ELEMENT) \|\| command_element.getNodeName().equals(LEVELS_ELEMENT)){
480	String command = toString(command_element, false);
481	if(command != null && command.length() > 0) {
482	write(buffered_writer, command);
483	buffered_writer.newLine();
484	just_wrote_newline = false;
485	}
486	}
487	}
488	}
489	buffered_writer.close();
490	known_metadata = null;
491	}
492	catch (Exception exception) {
493	Gatherer.println("Error in CollectionConfiguration.save(boolean): " + exception);
494	Gatherer.printStackTrace(exception);
495	}
496	}
497	}
498
499	/ ********************** Private Methods *************************/
500
501	private String classifyToString(Element command_element, boolean show_extracted_namespace) {
502	StringBuffer text = new StringBuffer(CLASSIFY_STR);
503	text.append(TAB_CHARACTER);
504	text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
505	text.append(SPACE_CHARACTER);
506	NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
507	int option_elements_length = option_elements.getLength();
508	for(int j = 0; j < option_elements_length; j++) {
509	Element option_element = (Element) option_elements.item(j);
510	if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
511	text.append(StaticStrings.MINUS_CHARACTER);
512	text.append(option_element.getAttribute(NAME_ATTRIBUTE));
513	String value_str = MSMUtils.getValue(option_element);
514	if(value_str.length() > 0) {
515	// If the value happens to be the identifier of an extracted metadata element, then remove the namespace.
516	if(!show_extracted_namespace && value_str.startsWith(EXTRACTED_PREFIX)) {
517	value_str = value_str.substring(EXTRACTED_PREFIX.length());
518	}
519	text.append(SPACE_CHARACTER);
520	if(value_str.indexOf(SPACE_CHARACTER) == -1) {
521	text.append(value_str);
522	}
523	else {
524	text.append(QUOTE_CHARACTER);
525	text.append(value_str);
526	text.append(QUOTE_CHARACTER);
527	}
528	}
529	value_str = null;
530	if(j < option_elements_length - 1) {
531	text.append(SPACE_CHARACTER);
532	}
533	}
534	option_element = null;
535	}
536	option_elements = null;
537	return text.toString();
538	}
539
540	private String formatToString(Element command_element, boolean show_extracted_namespace) {
541	StringBuffer text = new StringBuffer(FORMAT_STR);
542	text.append(SPACE_CHARACTER);
543	text.append(command_element.getAttribute(NAME_ATTRIBUTE));
544	text.append(SPACE_CHARACTER);
545	String value_str = command_element.getAttribute(VALUE_ATTRIBUTE);
546	if(value_str.length() != 0) {
547	text.append(value_str);
548	}
549	else {
550	value_str = MSMUtils.getValue(command_element);
551	// Remove any references to a namespace for extracted metadata
552	if(!show_extracted_namespace) {
553	value_str.replaceAll(EXTRACTED_PREFIX, "");
554	}
555	text.append(QUOTE_CHARACTER);
556	text.append(value_str);
557	text.append(QUOTE_CHARACTER);
558	}
559	value_str = null;
560	return text.toString();
561	}
562
563	/** Retrieve or create the indexes Element. */
564	private Element getOrCreateElementByTagName(String name, String conditional_attribute, String required_value) {
565	///ystem.err.println("Get or create element by tag name: " + name);
566	Element document_element = document.getDocumentElement();
567	NodeList elements = document_element.getElementsByTagName(name);
568	int elements_length = elements.getLength();
569	if(elements_length > 0) {
570	if(conditional_attribute == null) {
571	document_element = null;
572	return (Element) elements.item(0);
573	}
574	else {
575	for(int i = 0; i < elements_length; i++) {
576	Element element = (Element) elements.item(i);
577	if(element.getAttribute(conditional_attribute).equals(required_value)) {
578	document_element = null;
579	return element;
580	}
581	element = null;
582	}
583	}
584	}
585	// Create the element
586	Element element = document.createElement(name);
587	// If there was a property set it
588	if(conditional_attribute != null) {
589	element.setAttribute(conditional_attribute, required_value);
590	}
591	Node target_node = findInsertionPoint(element);
592	if(target_node != null) {
593	document_element.insertBefore(element, target_node);
594	}
595	else {
596	document_element.appendChild(element);
597	}
598	document_element = null;
599	return element;
600	}
601
602	private String indexesToString(Element command_element, boolean show_extracted_namespace) {
603	boolean comment_only = false;
604	StringBuffer text = new StringBuffer("");
605	if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
606	text.append("#");
607	comment_only = true;
608	}
609	text.append(INDEX_STR);
610	text.append(TAB_CHARACTER);
611	if(!comment_only) {
612	text.append(TAB_CHARACTER);
613	}
614	NodeList index_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
615	// For each index, write its level, a colon, then concatenate its child content elements into a single comma separated list
616	int index_elements_length = index_elements.getLength();
617	for(int j = 0; j < index_elements_length; j++) {
618	Element index_element = (Element) index_elements.item(j);
619	String level_str = index_element.getAttribute(LEVEL_ATTRIBUTE);
620	if(level_str.length() > 0) {
621	text.append(level_str);
622	text.append(StaticStrings.COLON_CHARACTER);
623	}
624	NodeList content_elements = index_element.getElementsByTagName(CONTENT_ELEMENT);
625	int content_elements_length = content_elements.getLength();
626	// Don't output anything if no indexes are set
627	if(content_elements_length == 0) {
628	return null;
629	}
630	for(int k = 0; k < content_elements_length; k++) {
631	Element content_element = (Element) content_elements.item(k);
632	String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
633	if(!show_extracted_namespace && name_str.startsWith(EXTRACTED_PREFIX)) {
634	name_str = name_str.substring(EXTRACTED_PREFIX.length());
635	}
636	text.append(name_str);
637	name_str = null;
638	if(k < content_elements_length - 1) {
639	text.append(StaticStrings.COMMA_CHARACTER);
640	}
641	content_element = null;
642	}
643	if(j < index_elements_length - 1) {
644	text.append(SPACE_CHARACTER);
645	}
646	content_elements = null;
647	index_element = null;
648	}
649	index_elements = null;
650	return text.toString();
651	}
652
653	private String indexDefaultToString(Element command_element, boolean show_extracted_namespace) {
654	StringBuffer text = new StringBuffer("");
655	if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(FALSE_STR)) {
656	text.append("#");
657	}
658	text.append(INDEX_DEFAULT_STR);
659	text.append(TAB_CHARACTER);
660	text.append(command_element.getAttribute(LEVEL_ATTRIBUTE));
661	text.append(StaticStrings.COLON_CHARACTER);
662	NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
663	int content_elements_length = content_elements.getLength();
664	for(int j = 0; j < content_elements_length; j++) {
665	Element content_element = (Element) content_elements.item(j);
666	String name_str = content_element.getAttribute(NAME_ATTRIBUTE);
667	if(!show_extracted_namespace && name_str.startsWith(EXTRACTED_PREFIX)) {
668	name_str = name_str.substring(EXTRACTED_PREFIX.length());
669	}
670	text.append(name_str);
671	name_str = null;
672	if(j < content_elements_length - 1) {
673	text.append(StaticStrings.COMMA_CHARACTER);
674	}
675	content_element = null;
676	}
677	content_elements = null;
678	return text.toString();
679	}
680
681	private String languagesToString(Element command_element) {
682	StringBuffer text = new StringBuffer(LANGUAGES_STR);
683	text.append(TAB_CHARACTER);
684	// Retrieve all the languages and write them out in a space separated list
685	NodeList language_elements = command_element.getElementsByTagName(LANGUAGE_ELEMENT);
686	int language_elements_length = language_elements.getLength();
687	if(language_elements_length == 0) {
688	return null;
689	}
690	for(int j = 0; j < language_elements_length; j++) {
691	Element language_element = (Element) language_elements.item(j);
692	text.append(language_element.getAttribute(NAME_ATTRIBUTE));
693	if(j < language_elements_length - 1) {
694	text.append(SPACE_CHARACTER);
695	}
696	}
697	return text.toString();
698	}
699
700	private String languageDefaultToString(Element command_element) {
701	StringBuffer text = new StringBuffer(LANGUAGE_DEFAULT_STR);
702	text.append(TAB_CHARACTER);
703	text.append(command_element.getAttribute(NAME_ATTRIBUTE));
704	return text.toString();
705	}
706
707	private String levelsToString(Element command_element) {
708	StringBuffer text = new StringBuffer("");
709	if(!command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
710	text.append("#");
711	}
712	text.append(LEVELS_STR);
713	text.append(TAB_CHARACTER);
714	text.append(TAB_CHARACTER);
715	NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
716	int content_elements_length = content_elements.getLength();
717	// Don't output anything if no levels are set.
718	if(content_elements_length == 0) {
719	return null;
720	}
721	for(int i = 0; i < content_elements_length; i++) {
722	Element content_element = (Element) content_elements.item(i);
723	text.append(content_element.getAttribute(NAME_ATTRIBUTE));
724	text.append(SPACE_CHARACTER);
725	}
726	return text.substring(0, text.length() - 1);
727	}
728
729	static public String metadataToString(Element command_element) {
730	// If there is no value attribute, then we don't write anything
731	String value_str = MSMUtils.getValue(command_element);
732	if(value_str == null \|\| value_str.length() == 0) {
733	return "";
734	}
735	else {
736	StringBuffer text = new StringBuffer("");
737	String name_str = command_element.getAttribute(NAME_ATTRIBUTE);
738	// If the name is one of the special four, we don't write the collectionmeta first. Note the maintainer collectionmeta is singled out for 'prittying' reasons.
739	if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
740	text.append(name_str);
741	text.append(TAB_CHARACTER);
742	}
743	else if(name_str.equals(COLLECTIONMETADATA_BETA_STR) \|\| name_str.equals(COLLECTIONMETADATA_CREATOR_STR) \|\| name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
744	text.append(name_str);
745	text.append(TAB_CHARACTER);
746	text.append(TAB_CHARACTER);
747	}
748	else {
749	text.append(COLLECTIONMETADATA_STR);
750	text.append(TAB_CHARACTER);
751	text.append(name_str);
752	text.append(SPACE_CHARACTER);
753	String language_str = command_element.getAttribute(LANGUAGE_ATTRIBUTE);
754	// If this is element is in english, and it is the first one found, we don't need to write the language argument.
755	if(!language_str.equals(ENGLISH_LANGUAGE_STR) \|\| known_metadata == null \|\| known_metadata.contains(name_str)) {
756	text.append(LBRACKET_CHARACTER);
757	text.append(LANGUAGE_ARGUMENT);
758	text.append(language_str);
759	text.append(RBRACKET_CHARACTER);
760	text.append(SPACE_CHARACTER);
761	}
762	if(known_metadata != null) {
763	known_metadata.add(name_str);
764	}
765	language_str = null;
766	}
767	name_str = null;
768	// We don't wrap the email addresses in quotes, nor any string without spaces
769	if(value_str.indexOf(SPACE_CHARACTER) == -1) {
770	text.append(value_str);
771	}
772	else {
773	text.append(QUOTE_CHARACTER);
774	text.append(value_str);
775	text.append(QUOTE_CHARACTER);
776	}
777	value_str = null;
778	return text.toString();
779	}
780	}
781
782	/** Parse a collect.cfg into a DOM model representation. */
783	private void parse(File collect_config_file) {
784	try {
785	Element collect_cfg_element = document.getDocumentElement();
786	// Read in the file command at a time.
787	FileReader in_reader = new FileReader(collect_config_file);
788	BufferedReader in = new BufferedReader(in_reader);
789	String command_str = null;
790	while((command_str = in.readLine()) != null) {
791	Element command_element = null;
792	// A command may be broken over several lines.
793	command_str = command_str.trim();
794	boolean eof = false;
795	while(!eof && command_str.endsWith(NEWLINE_CHARACTER)) {
796	String next_line = in.readLine();
797	if(next_line != null) {
798	next_line = next_line.trim();
799	if(next_line.length() > 0) {
800	// Remove the new line character
801	command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
802	// And append the next line, which due to the test above must be non-zero length
803	command_str = command_str + next_line;
804	}
805	next_line = null;
806	}
807	// If we've reached the end of the file theres nothing more we can do
808	else {
809	eof = true;
810	}
811	}
812	// If there is still a new line character, then we remove it and hope for the best
813	if(command_str.endsWith(NEWLINE_CHARACTER)) {
814	command_str = command_str.substring(0, command_str.lastIndexOf(NEWLINE_CHARACTER));
815	}
816	// Now we've either got a command to parse...
817	if(command_str.length() != 0) {
818	// Start trying to figure out what it is
819	StringTokenizer tokenizer = new StringTokenizer(command_str);
820	String command_type = tokenizer.nextToken().toLowerCase();
821	tokenizer = null;
822	// Why can't you switch on strings eh? We pass it to the various subparsers who each have a bash at parsing the command. If none can parse the command, an unknown element is created
823	if(command_element == null && command_type.equals(CLASSIFY_STR)) {
824	command_element = parseClassify(command_str);
825	}
826	if(command_element == null && command_type.equals(FORMAT_STR)) {
827	command_element = parseFormat(command_str);
828	}
829	if(command_element == null && (command_type.equals(INDEX_STR) \|\| command_type.equals(COMMENTED_INDEXES_STR))) {
830	command_element = parseIndex(command_str);
831	}
832	if(command_element == null && (command_type.equals(INDEX_DEFAULT_STR) \|\| command_type.equals(COMMENTED_INDEX_DEFAULT_STR))) {
833	command_element = parseIndexDefault(command_str);
834	}
835	if(command_element == null && command_type.equals(LANGUAGES_STR)) {
836	command_element = parseLanguage(command_str);
837	}
838	if(command_element == null && command_type.equals(LANGUAGE_DEFAULT_STR)) {
839	command_element = parseLanguageDefault(command_str);
840	}
841	if(command_element == null && (command_type.equals(LEVELS_STR) \|\| command_type.equals(COMMENTED_LEVELS_STR))) {
842	command_element = parseLevels(command_str);
843	}
844	if(command_element == null && command_type.equals(COLLECTIONMETADATA_STR)) {
845	command_element = parseMetadata(command_str);
846	}
847	if(command_element == null && (command_type.equals(COLLECTIONMETADATA_BETA_STR) \|\| command_type.equals(COLLECTIONMETADATA_PUBLIC_STR) \|\| command_type.equals(COLLECTIONMETADATA_CREATOR_STR) \|\| command_type.equals(COLLECTIONMETADATA_MAINTAINER_STR))) {
848	command_element = parseMetadataSpecial(command_str);
849	}
850	if(command_element == null && command_type.equals(PLUGIN_STR)) {
851	command_element = parsePlugIn(command_str);
852	}
853	if(command_element == null && command_type.equals(SEARCHTYPE_STR)) {
854	command_element = parseSearchType(command_str);
855	}
856	if(command_element == null && command_type.equals(SUBCOLLECTION_STR)) {
857	command_element = parseSubCollection(command_str);
858	}
859	if(command_element == null && command_type.equals(SUBCOLLECTION_DEFAULT_INDEX_STR)) {
860	command_element = parseSubCollectionDefaultIndex(command_str);
861	}
862	if(command_element == null && command_type.equals(SUBCOLLECTION_INDEX_STR)) {
863	command_element = parseSubCollectionIndex(command_str);
864	}
865	if(command_element == null && (command_type.equals(SUPERCOLLECTION_STR) \|\| command_type.equals(CCS_STR))) {
866	command_element = parseSuperCollection(command_str);
867	}
868	// Doesn't match any known type
869	command_type = null;
870	if(command_element == null) {
871	// No-one knows what to do with this command, so we create an Unknown command element
872	command_element = document.createElement(UNKNOWN_ELEMENT);
873	MSMUtils.setValue(command_element, command_str);
874	}
875	}
876	// Or an empty line to remember for later
877	else {
878	command_element = document.createElement(NEWLINE_ELEMENT);
879	}
880	// Now command element shouldn't be null so we append it to the collection config DOM
881	collect_cfg_element.appendChild(command_element);
882	}
883	}
884	catch(Exception exception) {
885	Gatherer.println("Error in CollectionConfiguration.parse(java.io.File): " + exception);
886	Gatherer.printStackTrace(exception);
887	}
888	}
889
890	private Element parseClassify(String command_str) {
891	Element command_element = null;
892	try {
893	CommandTokenizer tokenizer = new CommandTokenizer(command_str);
894	// Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
895	if(tokenizer.countTokens() >= 4) {
896	command_element = document.createElement(CLASSIFY_ELEMENT);
897	// First token is classify
898	tokenizer.nextToken();
899	// The next token is the classifier type
900	command_element.setAttribute(TYPE_ATTRIBUTE, tokenizer.nextToken());
901	// Now we parse out the remaining arguments into a hashmapping from name to value
902	HashMap arguments = parseArguments(tokenizer);
903	// Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
904	Iterator names = arguments.keySet().iterator();
905	while(names.hasNext()) {
906	String name = (String) names.next();
907	String value = (String) arguments.get(name); // Can be null
908	// The metadata argument gets added as the content attribute
909	if(name.equals(METADATA_ARGUMENT) && value != null) {
910	// The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace.
911	if(value.indexOf(MSMUtils.NS_SEP) == -1) {
912	value = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + value;
913	}
914	//command_element.setAttribute(CONTENT_ATTRIBUTE, value);
915	}
916	// Everything else is an Option Element
917	Element option_element = document.createElement(OPTION_ELEMENT);
918	option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
919	if(value != null) {
920	MSMUtils.setValue(option_element, value);
921	}
922	option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
923	option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
924	command_element.appendChild(option_element);
925	option_element = null;
926	name = null;
927	value = null;
928	}
929	names = null;
930	arguments = null;
931	}
932	tokenizer = null;
933	}
934	catch(Exception error) {
935	}
936	return command_element;
937	}
938
939	private Element parseFormat(String command_str) {
940	Element command_element = null;
941	try {
942	CommandTokenizer tokenizer = new CommandTokenizer(command_str);
943	if(tokenizer.countTokens() >= 3) {
944	command_element = document.createElement(FORMAT_ELEMENT);
945	// First token is format
946	tokenizer.nextToken();
947	command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
948	String format_value = tokenizer.nextToken();
949	// If the value is true or false we add it as an attribute
950	if(format_value.equalsIgnoreCase(TRUE_STR) \|\| format_value.equalsIgnoreCase(FALSE_STR)) {
951	command_element.setAttribute(VALUE_ATTRIBUTE, format_value.toLowerCase());
952	}
953	// Otherwise it gets added as a text node
954	else {
955	// Strip any speech marks
956	if(format_value.startsWith(QUOTE_CHARACTER) && format_value.endsWith(QUOTE_CHARACTER)) {
957	format_value = format_value.substring(1, format_value.length() - 1);
958	}
959	MSMUtils.setValue(command_element, format_value);
960	}
961	format_value = null;
962	}
963	tokenizer = null;
964	}
965	catch(Exception exception) {
966	}
967	return command_element;
968	}
969
970	private Element parseIndex(String command_str) {
971	Element command_element = null;
972	try {
973	StringTokenizer tokenizer = new StringTokenizer(command_str);
974	String command = tokenizer.nextToken();
975	if(tokenizer.hasMoreTokens()) {
976	command_element = document.createElement(INDEXES_ELEMENT);
977	}
978	command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(INDEX_STR) ? TRUE_STR : FALSE_STR));
979	command = null;
980	while(tokenizer.hasMoreTokens()) {
981	Element index_element = document.createElement(INDEX_ELEMENT);
982	String index_str = tokenizer.nextToken();
983	// There are two types of index we have to consider. Old G2.38 and earlier use level:source tuplets while G2.39+ have just a single, non-comma separated list where order is important.
984	boolean old_index;
985	if(index_str.indexOf(COLON_CHARACTER) != -1) {
986	old_index = true;
987	index_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
988	index_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
989	command_element.setAttribute(MGPP_ATTRIBUTE, FALSE_STR);
990	}
991	else {
992	command_element.setAttribute(MGPP_ATTRIBUTE, TRUE_STR);
993	old_index = false;
994	}
995	StringTokenizer content_tokenizer = new StringTokenizer(index_str, StaticStrings.COMMA_CHARACTER);
996	while(content_tokenizer.hasMoreTokens()) {
997	Element content_element = document.createElement(CONTENT_ELEMENT);
998	String content_str = content_tokenizer.nextToken();
999	// Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1000	if(content_str.indexOf(MSMUtils.NS_SEP) == -1) {
1001	if(content_str.equals(StaticStrings.TEXT_STR) \|\| (!old_index && content_str.equals(StaticStrings.ALLFIELDS_STR))) {
1002	// Our special strings are OK.
1003	}
1004	else {
1005	content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str;
1006	}
1007	}
1008	content_element.setAttribute(NAME_ATTRIBUTE, content_str);
1009	index_element.appendChild(content_element);
1010	content_element = null;
1011	}
1012	content_tokenizer = null;
1013	index_str = null;
1014	command_element.appendChild(index_element);
1015	index_element = null;
1016	}
1017	tokenizer = null;
1018	}
1019	catch (Exception exception) {
1020	exception.printStackTrace();
1021	}
1022	return command_element;
1023	}
1024
1025	private Element parseIndexDefault(String command_str) {
1026	Element command_element = null;
1027	try {
1028	StringTokenizer tokenizer = new StringTokenizer(command_str);
1029	if(tokenizer.countTokens() >= 2) {
1030	command_element = document.createElement(INDEX_DEFAULT_ELEMENT);
1031	command_element.setAttribute(ASSIGNED_ATTRIBUTE, ((tokenizer.nextToken()).equals(INDEX_DEFAULT_STR) ? TRUE_STR : FALSE_STR));
1032	String index_str = tokenizer.nextToken();
1033	command_element.setAttribute(LEVEL_ATTRIBUTE, index_str.substring(0, index_str.indexOf(StaticStrings.COLON_CHARACTER)));
1034	String content_str = index_str.substring(index_str.indexOf(StaticStrings.COLON_CHARACTER) + 1);
1035	StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1036	while(content_tokenizer.hasMoreTokens()) {
1037	Element content_element = document.createElement(CONTENT_ELEMENT);
1038	content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1039	command_element.appendChild(content_element);
1040	content_element = null;
1041	}
1042	content_tokenizer = null;
1043	content_str = null;
1044	content_str = null;
1045	index_str = null;
1046	}
1047	tokenizer = null;
1048	}
1049	catch (Exception exception) {
1050	}
1051	return command_element;
1052	}
1053
1054	private Element parseLanguage(String command_str) {
1055	Element command_element = null;
1056	try {
1057	StringTokenizer tokenizer = new StringTokenizer(command_str);
1058	tokenizer.nextToken();
1059	if(tokenizer.hasMoreTokens()) {
1060	command_element = document.createElement(LANGUAGES_ELEMENT);
1061	while(tokenizer.hasMoreTokens()) {
1062	Element language_element = document.createElement(LANGUAGE_ELEMENT);
1063	language_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1064	command_element.appendChild(language_element);
1065	language_element = null;
1066	}
1067	}
1068	tokenizer = null;
1069	}
1070	catch (Exception exception) {
1071	}
1072	return command_element;
1073	}
1074
1075	private Element parseLanguageDefault(String command_str) {
1076	Element command_element = null;
1077	try {
1078	StringTokenizer tokenizer = new StringTokenizer(command_str);
1079	if(tokenizer.countTokens() >= 2) {
1080	command_element = document.createElement(LANGUAGE_DEFAULT_ELEMENT);
1081	tokenizer.nextToken();
1082	String default_language_str = tokenizer.nextToken();
1083	command_element.setAttribute(NAME_ATTRIBUTE, default_language_str);
1084	default_language_str = null;
1085	}
1086	tokenizer = null;
1087	}
1088	catch (Exception exception) {
1089	}
1090	return command_element;
1091	}
1092
1093	private Element parseLevels(String command_str) {
1094	Element command_element = null;
1095	try {
1096	StringTokenizer tokenizer = new StringTokenizer(command_str);
1097	// First token is command type (levels)
1098	String command = tokenizer.nextToken();
1099	if(tokenizer.hasMoreTokens()) {
1100	command_element = document.createElement(LEVELS_ELEMENT);
1101	command_element.setAttribute(ASSIGNED_ATTRIBUTE, (command.equals(LEVELS_STR) ? TRUE_STR : FALSE_STR));
1102	while(tokenizer.hasMoreTokens()) {
1103	Element level_element = document.createElement(CONTENT_ELEMENT);
1104	level_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1105	command_element.appendChild(level_element);
1106	level_element = null;
1107	}
1108	}
1109	command = null;
1110	}
1111	catch(Exception exception) {
1112	}
1113	return command_element;
1114	}
1115
1116	private Element parseMetadata(String command_str) {
1117	Element command_element = null;
1118	try {
1119	CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1120	if(tokenizer.countTokens() >= 3) {
1121	command_element = document.createElement(COLLECTIONMETADATA_ELEMENT);
1122	// First token is command type
1123	tokenizer.nextToken();
1124	String name_str = tokenizer.nextToken();
1125	String value_str = tokenizer.nextToken();
1126	String language_str = "en"; // By default
1127	// Check if the value string is actually a language string
1128	if(value_str.startsWith(LBRACKET_CHARACTER) && value_str.endsWith(RBRACKET_CHARACTER)) {
1129	language_str = value_str.substring(value_str.indexOf(LANGUAGE_ARGUMENT) + 2, value_str.length() - 1);
1130	value_str = tokenizer.nextToken();
1131	}
1132	// Remove any speech marks from value
1133	if(value_str.startsWith(QUOTE_CHARACTER) && value_str.endsWith(QUOTE_CHARACTER)) {
1134	value_str = value_str.substring(1, value_str.length() - 1);
1135	}
1136	if(value_str != null) {
1137	command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1138	command_element.setAttribute(LANGUAGE_ATTRIBUTE, language_str);
1139	MSMUtils.setValue(command_element, Utility.encodeXML(value_str));
1140	}
1141	else {
1142	command_element = null;
1143	}
1144	language_str = null;
1145	value_str = null;
1146	name_str = null;
1147	}
1148	tokenizer = null;
1149	}
1150	catch (Exception exception) {
1151	}
1152	return command_element;
1153	}
1154
1155	private Element parseMetadataSpecial(String command_str) {
1156	Element command_element = null;
1157	try {
1158	StringTokenizer tokenizer = new StringTokenizer(command_str);
1159	if(tokenizer.countTokens() >= 2) {
1160	String name_str = tokenizer.nextToken();
1161	String value_str = tokenizer.nextToken();
1162	if(name_str.equals(COLLECTIONMETADATA_BETA_STR)) {
1163	command_element = document.createElement(COLLECTIONMETADATA_BETA_ELEMENT);
1164	}
1165	else if(name_str.equals(COLLECTIONMETADATA_CREATOR_STR)) {
1166	command_element = document.createElement(COLLECTIONMETADATA_CREATOR_ELEMENT);
1167	}
1168	else if(name_str.equals(COLLECTIONMETADATA_MAINTAINER_STR)) {
1169	command_element = document.createElement(COLLECTIONMETADATA_MAINTAINER_ELEMENT);
1170	}
1171	else if(name_str.equals(COLLECTIONMETADATA_PUBLIC_STR)) {
1172	command_element = document.createElement(COLLECTIONMETADATA_PUBLIC_ELEMENT);
1173	}
1174	if(command_element != null) {
1175	command_element.setAttribute(NAME_ATTRIBUTE, name_str);
1176	command_element.setAttribute(LANGUAGE_ATTRIBUTE, ENGLISH_LANGUAGE_STR);
1177	command_element.setAttribute(SPECIAL_ATTRIBUTE, TRUE_STR);
1178	MSMUtils.setValue(command_element, value_str);
1179	}
1180	value_str = null;
1181	name_str = null;
1182	}
1183	tokenizer = null;
1184	}
1185	catch (Exception exception) {
1186	}
1187	return command_element;
1188	}
1189
1190	private Element parsePlugIn(String command_str) {
1191	Element command_element = null;
1192	boolean use_metadata_files = false;
1193	try {
1194	CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1195	// Check the token count. The token count from a command tokenizer isn't guarenteed to be correct, but it does give the maximum number of available tokens according to the underlying StringTokenizer (some of which may actually be append together by the CommandTokenizer as being a single argument).
1196	if(tokenizer.countTokens() >= 2) {
1197	command_element = document.createElement(PLUGIN_ELEMENT);
1198	// First token is plugin
1199	tokenizer.nextToken();
1200	// The next token is the type
1201	String type = tokenizer.nextToken();
1202	command_element.setAttribute(TYPE_ATTRIBUTE, type);
1203	// Now we parse out the remaining arguments into a hashmapping from name to value
1204	HashMap arguments = parseArguments(tokenizer);
1205	// Assign the arguments as Option elements, but watch out for the metadata argument as we treat that differently
1206	Iterator names = arguments.keySet().iterator();
1207	while(names.hasNext()) {
1208	String name = (String) names.next();
1209	String value = (String) arguments.get(name); // Can be null
1210	Element option_element = document.createElement(OPTION_ELEMENT);
1211	if(name.substring(1).equals(USE_METADATA_FILES_ARGUMENT)) {
1212	use_metadata_files = true;
1213	}
1214	option_element.setAttribute(NAME_ATTRIBUTE, name.substring(1));
1215	option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1216	option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR); // All arguments are considered to be custom until matched against base plugins arguments
1217	if(value != null) {
1218	if(name.equals(METADATA_ARGUMENT)) {
1219	// The metadata argument must be the fully qualified name of a metadata element, so if it doesn't yet have a namespace, append the extracted metadata namespace.
1220	if(value.indexOf(MSMUtils.NS_SEP) == -1) {
1221	value = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + value;
1222	}
1223	}
1224	MSMUtils.setValue(option_element, value);
1225	}
1226	command_element.appendChild(option_element);
1227	option_element = null;
1228	name = null;
1229	value = null;
1230	}
1231	// Finally if the type happened to be a RecPlug we ensure that the use metadata files argument is set
1232	if(type.equals(RECPLUG_STR) && !use_metadata_files) {
1233	Element option_element = document.createElement(OPTION_ELEMENT);
1234	option_element.setAttribute(NAME_ATTRIBUTE, USE_METADATA_FILES_ARGUMENT);
1235	option_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1236	option_element.setAttribute(CUSTOM_ATTRIBUTE, TRUE_STR);
1237	command_element.appendChild(option_element);
1238	option_element = null;
1239	}
1240	type = null;
1241	names = null;
1242	arguments = null;
1243	}
1244	tokenizer = null;
1245	}
1246	catch(Exception exception) {
1247	}
1248	return command_element;
1249	}
1250
1251	private Element parseSearchType(String command_str) {
1252	Element command_element = null;
1253	try {
1254	StringTokenizer tokenizer = new StringTokenizer(command_str);
1255	// First token is command type (levels)
1256	tokenizer.nextToken();
1257	if(tokenizer.hasMoreTokens()) {
1258	command_element = document.createElement(SEARCHTYPE_ELEMENT);
1259	command_element.setAttribute(ASSIGNED_ATTRIBUTE, TRUE_STR);
1260	while(tokenizer.hasMoreTokens()) {
1261	Element search_element = document.createElement(CONTENT_ELEMENT);
1262	search_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1263	command_element.appendChild(search_element);
1264	search_element = null;
1265	}
1266	}
1267	}
1268	catch(Exception exception) {
1269	}
1270	return command_element;
1271	}
1272
1273	private Element parseSubCollection(String command_str) {
1274	Element command_element = null;
1275	try {
1276	CommandTokenizer tokenizer = new CommandTokenizer(command_str);
1277	if(tokenizer.countTokens() >= 3) {
1278	command_element = document.createElement(SUBCOLLECTION_ELEMENT);
1279	// First token is command type
1280	tokenizer.nextToken();
1281	// Then subcollection identifier
1282	command_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1283	// Then finally the pattern used to build the subcollection partition
1284	String full_pattern_str = tokenizer.nextToken();
1285	// To make life easier I'm going to parse this up now.
1286	boolean exclusion = (full_pattern_str.substring(1, 2).equals(EXCLAIMATION_CHARACTER));
1287	// Set inclusion/exclusion flag, remove any exclaimation mark and the speech marks
1288	if(exclusion) {
1289	full_pattern_str = full_pattern_str.substring(2, full_pattern_str.length() - 1);
1290	command_element.setAttribute(TYPE_ATTRIBUTE, EXCLUDE_STR);
1291	}
1292	else {
1293	full_pattern_str = full_pattern_str.substring(1, full_pattern_str.length() - 1);
1294	command_element.setAttribute(TYPE_ATTRIBUTE, INCLUDE_STR);
1295	}
1296	StringTokenizer pattern_tokenizer = new StringTokenizer(full_pattern_str, SEPARATOR_CHARACTER);
1297	if(pattern_tokenizer.countTokens() >= 2) {
1298	String content_str = pattern_tokenizer.nextToken();
1299	// Since the contents of indexes have to be certain keywords, or metadata elements, if the content isn't a keyword and doesn't yet have a namespace, append the extracted metadata namespace.
1300	if(!content_str.equals(StaticStrings.FILENAME_STR) && content_str.indexOf(MSMUtils.NS_SEP) == -1) {
1301	content_str = Utility.EXTRACTED_METADATA_NAMESPACE + MSMUtils.NS_SEP + content_str;
1302	}
1303	command_element.setAttribute(CONTENT_ATTRIBUTE, content_str);
1304	MSMUtils.setValue(command_element, pattern_tokenizer.nextToken());
1305	if(pattern_tokenizer.hasMoreTokens()) {
1306	command_element.setAttribute(OPTIONS_ATTRIBUTE, pattern_tokenizer.nextToken());
1307	}
1308	}
1309	pattern_tokenizer = null;
1310	}
1311	}
1312	catch(Exception exception) {
1313	exception.printStackTrace();
1314	}
1315	return command_element;
1316	}
1317
1318	private Element parseSubCollectionDefaultIndex(String command_str) {
1319	Element command_element = null;
1320	try {
1321	StringTokenizer tokenizer = new StringTokenizer(command_str);
1322	if(tokenizer.countTokens() == 2) {
1323	command_element = document.createElement(SUBCOLLECTION_DEFAULT_INDEX_ELEMENT);
1324	tokenizer.nextToken();
1325	//command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1326	String content_str = tokenizer.nextToken();
1327	StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1328	while(content_tokenizer.hasMoreTokens()) {
1329	Element content_element = document.createElement(CONTENT_ELEMENT);
1330	content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1331	command_element.appendChild(content_element);
1332	content_element = null;
1333	}
1334	content_tokenizer = null;
1335	content_str = null;
1336	}
1337	tokenizer = null;
1338	}
1339	catch(Exception exception) {
1340	}
1341	return command_element;
1342	}
1343
1344	private Element parseSubCollectionIndex(String command_str) {
1345	Element command_element = null;
1346	try {
1347	StringTokenizer tokenizer = new StringTokenizer(command_str);
1348	tokenizer.nextToken();
1349	if(tokenizer.hasMoreTokens()) {
1350	command_element = document.createElement(SUBCOLLECTION_INDEXES_ELEMENT);
1351	}
1352	while(tokenizer.hasMoreTokens()) {
1353	Element subcollectionindex_element = document.createElement(INDEX_ELEMENT);
1354	//command_element.setAttribute(CONTENT_ATTRIBUTE, tokenizer.nextToken());
1355	String content_str = tokenizer.nextToken();
1356	StringTokenizer content_tokenizer = new StringTokenizer(content_str, StaticStrings.COMMA_CHARACTER);
1357	while(content_tokenizer.hasMoreTokens()) {
1358	Element content_element = document.createElement(CONTENT_ELEMENT);
1359	content_element.setAttribute(NAME_ATTRIBUTE, content_tokenizer.nextToken());
1360	subcollectionindex_element.appendChild(content_element);
1361	content_element = null;
1362	}
1363	content_tokenizer = null;
1364	content_str = null;
1365	command_element.appendChild(subcollectionindex_element);
1366	subcollectionindex_element = null;
1367	}
1368	tokenizer = null;
1369	}
1370	catch (Exception exception) {
1371	}
1372	return command_element;
1373	}
1374
1375	private Element parseSuperCollection(String command_str) {
1376	Element command_element = null;
1377	try {
1378	StringTokenizer tokenizer = new StringTokenizer(command_str);
1379	if(tokenizer.countTokens() >= 3) {
1380	command_element = document.createElement(SUPERCOLLECTION_ELEMENT);
1381	tokenizer.nextToken();
1382	while(tokenizer.hasMoreTokens()) {
1383	Element collection_element = document.createElement(COLLECTION_ELEMENT);
1384	collection_element.setAttribute(NAME_ATTRIBUTE, tokenizer.nextToken());
1385	command_element.appendChild(collection_element);
1386	collection_element = null;
1387	}
1388	}
1389	tokenizer = null;
1390	}
1391	catch(Exception exception) {
1392	}
1393	return command_element;
1394	}
1395
1396	private String pluginToString(Element command_element, boolean show_extracted_namespace) {
1397	StringBuffer text = new StringBuffer();
1398	if(!command_element.getAttribute(SEPARATOR_ATTRIBUTE).equals(TRUE_STR)) {
1399	text.append(PLUGIN_STR);
1400	text.append(TAB_CHARACTER);
1401	text.append(TAB_CHARACTER);
1402	text.append(command_element.getAttribute(TYPE_ATTRIBUTE));
1403	// Retrieve, and output, the arguments
1404	NodeList option_elements = command_element.getElementsByTagName(OPTION_ELEMENT);
1405	int option_elements_length = option_elements.getLength();
1406	if(option_elements_length > 0) {
1407	text.append(SPACE_CHARACTER);
1408	for(int j = 0; j < option_elements_length; j++) {
1409	Element option_element = (Element) option_elements.item(j);
1410	if(option_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1411	text.append(StaticStrings.MINUS_CHARACTER);
1412	text.append(option_element.getAttribute(NAME_ATTRIBUTE));
1413	String value_str = MSMUtils.getValue(option_element);
1414	if(!show_extracted_namespace && value_str.startsWith(EXTRACTED_PREFIX)) {
1415	value_str = value_str.substring(EXTRACTED_PREFIX.length());
1416	}
1417	if(value_str.length() > 0) {
1418	text.append(SPACE_CHARACTER);
1419	if(value_str.indexOf(SPACE_CHARACTER) == -1) {
1420	text.append(value_str);
1421	}
1422	else {
1423	text.append(QUOTE_CHARACTER);
1424	text.append(value_str);
1425	text.append(QUOTE_CHARACTER);
1426	}
1427	}
1428	value_str = null;
1429	if(j < option_elements_length - 1) {
1430	text.append(SPACE_CHARACTER);
1431	}
1432	}
1433	option_element = null;
1434	}
1435	}
1436	option_elements = null;
1437	}
1438	return text.toString();
1439	}
1440
1441	private String searchtypeToString(Element command_element) {
1442	if(command_element.getAttribute(ASSIGNED_ATTRIBUTE).equals(TRUE_STR)) {
1443	StringBuffer text = new StringBuffer(SEARCHTYPE_STR);
1444	text.append(TAB_CHARACTER);
1445	NodeList search_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1446	int search_elements_length = search_elements.getLength();
1447	for(int i = 0; i < search_elements_length; i++) {
1448	Element search_element = (Element) search_elements.item(i);
1449	text.append(search_element.getAttribute(NAME_ATTRIBUTE));
1450	text.append(SPACE_CHARACTER);
1451	}
1452	return text.substring(0, text.length() - 1);
1453	}
1454	else {
1455	return null;
1456	}
1457	}
1458
1459	private String subcollectionToString(Element command_element, boolean show_extracted_namespace) {
1460	StringBuffer text = new StringBuffer(SUBCOLLECTION_STR);
1461	text.append(SPACE_CHARACTER);
1462	text.append(command_element.getAttribute(NAME_ATTRIBUTE));
1463	text.append(SPACE_CHARACTER);
1464	text.append(TAB_CHARACTER);
1465	text.append(QUOTE_CHARACTER);
1466	if(command_element.getAttribute(TYPE_ATTRIBUTE).equals(EXCLUDE_STR)) {
1467	text.append(EXCLAIMATION_CHARACTER);
1468	}
1469	String content_str = command_element.getAttribute(CONTENT_ATTRIBUTE);
1470	if(!show_extracted_namespace && content_str.startsWith(EXTRACTED_PREFIX)) {
1471	content_str = content_str.substring(EXTRACTED_PREFIX.length());
1472	}
1473	text.append(content_str);
1474	content_str = null;
1475	text.append(SEPARATOR_CHARACTER);
1476	text.append(MSMUtils.getValue(command_element));
1477	String options_str = command_element.getAttribute(OPTIONS_ATTRIBUTE);
1478	if(options_str.length() > 0) {
1479	text.append(SEPARATOR_CHARACTER);
1480	text.append(options_str);
1481	}
1482	options_str = null;
1483	text.append(QUOTE_CHARACTER);
1484	return text.toString();
1485	}
1486
1487	private String subcollectionDefaultIndexToString(Element command_element) {
1488	StringBuffer text = new StringBuffer(SUBCOLLECTION_DEFAULT_INDEX_STR);
1489	text.append(TAB_CHARACTER);
1490	NodeList content_elements = command_element.getElementsByTagName(CONTENT_ELEMENT);
1491	int content_elements_length = content_elements.getLength();
1492	for(int j = 0; j < content_elements_length; j++) {
1493	Element content_element = (Element) content_elements.item(j);
1494	text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1495	if(j < content_elements_length - 1) {
1496	text.append(StaticStrings.COMMA_CHARACTER);
1497	}
1498	}
1499	return text.toString();
1500	}
1501
1502	private String subcollectionIndexesToString(Element command_element) {
1503	StringBuffer text = new StringBuffer(SUBCOLLECTION_INDEX_STR);
1504	text.append(TAB_CHARACTER);
1505	// Retrieve all of the subcollection index partitions
1506	NodeList subcollectionindex_elements = command_element.getElementsByTagName(INDEX_ELEMENT);
1507	int subcollectionindex_elements_length = subcollectionindex_elements.getLength();
1508	if(subcollectionindex_elements_length == 0) {
1509	return null;
1510	}
1511	for(int j = 0; j < subcollectionindex_elements_length; j++) {
1512	Element subcollectionindex_element = (Element) subcollectionindex_elements.item(j);
1513	NodeList content_elements = subcollectionindex_element.getElementsByTagName(CONTENT_ELEMENT);
1514	int content_elements_length = content_elements.getLength();
1515	for(int k = 0; k < content_elements_length; k++) {
1516	Element content_element = (Element) content_elements.item(k);
1517	text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1518	if(k < content_elements_length - 1) {
1519	text.append(StaticStrings.COMMA_CHARACTER);
1520	}
1521	}
1522	if(j < subcollectionindex_elements_length - 1) {
1523	text.append(SPACE_CHARACTER);
1524	}
1525	}
1526	return text.toString();
1527	}
1528
1529	private String supercollectionToString(Element command_element) {
1530	NodeList content_elements = command_element.getElementsByTagName(COLLECTION_ELEMENT);
1531	int content_elements_length = content_elements.getLength();
1532	if(content_elements_length > 1) {
1533	StringBuffer text = new StringBuffer(SUPERCOLLECTION_STR);
1534	text.append(TAB_CHARACTER);
1535	for(int j = 0; j < content_elements_length; j++) {
1536	Element content_element = (Element) content_elements.item(j);
1537	text.append(content_element.getAttribute(NAME_ATTRIBUTE));
1538	if(j < content_elements_length - 1) {
1539	text.append(SPACE_CHARACTER);
1540	}
1541	}
1542	return text.toString();
1543	}
1544	return null;
1545	}
1546
1547	private String unknownToString(Element command_element) {
1548	return MSMUtils.getValue(command_element);
1549	}
1550
1551	/** Write the text to the buffer. This is used so we don't have to worry about storing intermediate String values just so we can calaulate length and offset.
1552	* @param writer the BufferedWriter to which the str will be written
1553	* @param str the String to be written
1554	*/
1555	private void write(BufferedWriter writer, String str)
1556	throws IOException {
1557	writer.write(str, 0, str.length());
1558	}
1559	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: