Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2Retrieve.java@ 6490

Last change on this file since 6490 was 6275, checked in by kjdon, 20 years ago
now checks for empty metadata before adding it
Property svn:keywords set to `Author Date Id Revision`
File size: 32.1 KB

Line
1	/*
2	* GS2Retrieve.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.service;
20
21
22	// Greenstone classes
23	import org.greenstone.gdbm.*;
24	import org.greenstone.gsdl3.util.*;
25
26	// XML classes
27	import org.w3c.dom.Document;
28	import org.w3c.dom.Element;
29	import org.w3c.dom.NodeList;
30
31	// General Java classes
32	import java.io.File;
33	import java.util.StringTokenizer;
34	import java.util.Vector;
35	import java.util.Set;
36	import java.util.Iterator;
37
38	/** Implements the generic retrieval and classifier services for GS2
39	* collections.
40	*
41	* @author <a href="mailto:[email protected]">Katherine Don</a>
42	* @author <a href="mailto:[email protected]">Michael Dewsnip</a>
43	* @version $Revision: 6275 $
44	*/
45
46	public abstract class GS2Retrieve
47	extends ServiceRack {
48
49	// the services on offer
50	// these strings must match what is found in the properties file
51	protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
52	protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
53	protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
54
55
56	// the browsing services - now in here, these will only be advertised if classifiers have been specified in the config file
57	private static final String CLASSIFIER_SERVICE = "ClassifierBrowse";
58	private static final String CLASSIFIER_METADATA_SERVICE = "ClassifierBrowseMetadataRetrieve";
59
60	protected static final String STRUCT_PARAM = "structure";
61	protected static final String INFO_PARAM = "info";
62
63	protected static final String STRUCT_ANCESTORS = "ancestors";
64	protected static final String STRUCT_PARENT = "parent";
65	protected static final String STRUCT_SIBS = "siblings";
66	protected static final String STRUCT_CHILDREN = "children";
67	protected static final String STRUCT_DESCENDS = "descendants";
68	protected static final String STRUCT_ENTIRE = "entire";
69
70	protected static final String INFO_NUM_SIBS = "numSiblings";
71	protected static final String INFO_NUM_CHILDREN = "numChildren";
72	protected static final String INFO_SIB_POS = "siblingPosition";
73
74	protected static final int DOCUMENT=1;
75	protected static final int CLASSIFIER=2;
76
77	protected GDBMWrapper gdbm_src = null;
78	protected Element config_info = null; // the xml from the config file
79
80	/** constructor */
81	protected GS2Retrieve()
82	{
83	this.gdbm_src = new GDBMWrapper();
84	}
85
86
87	/** configure this service */
88	public boolean configure(Element info, Element extra_info)
89	{
90	System.out.println("Configuring GS2Retrieve...");
91	this.config_info = info;
92
93	// set up short_service_info_ - for now just has name and type
94	Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
95	dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
96	dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
97	this.short_service_info.appendChild(dsr_service);
98
99	Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
100	dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
101	dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
102	this.short_service_info.appendChild(dmr_service);
103
104	Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
105	dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
106	dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
107	this.short_service_info.appendChild(dcr_service);
108
109	// Open GDBM database for querying
110	String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
111	if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
112	System.err.println("GS2Retrieve Error: Could not open GDBM database!");
113	return false;
114	}
115
116	// now do the classifier browse service
117
118	// check that there are classifiers specified
119	Element class_list = (Element)GSXML.getChildByTagName(info, GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
120	if (class_list == null) {
121	// no classifiers specified
122	return true;
123	}
124
125	// get the display and format elements from the coll config file for
126	// the classifiers
127	extractExtraClassifierInfo(info, extra_info);
128
129	this.config_info = info;
130
131	// short_service_info_ - the browse one
132	Element cb_service = this.doc.createElement(GSXML.SERVICE_ELEM);
133	cb_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_BROWSE);
134	cb_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_SERVICE);
135	this.short_service_info.appendChild(cb_service);
136
137	// metadata retrieval for the browsing
138	Element cbmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
139	cbmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
140	cbmr_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_METADATA_SERVICE);
141	this.short_service_info.appendChild(cbmr_service);
142
143	// the format info
144	Element cb_format_info = this.doc.createElement(GSXML.FORMAT_ELEM);
145	boolean format_found = false;
146
147	// try the default format first
148	Element def_format = (Element) GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM);
149	if (def_format != null) {
150	cb_format_info.appendChild(GSXML.duplicateWithNewName(this.doc, def_format, GSXML.DEFAULT_ELEM, true));
151	format_found = true;
152	}
153
154	// add in to the description a simplified list of classifiers
155	NodeList classifiers = class_list.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
156	for(int i=0; i<classifiers.getLength(); i++) {
157	Element cl = (Element)classifiers.item(i);
158	Element new_cl = (Element)this.doc.importNode(cl, false); // just import this node, not the children
159
160	// get the format info out, and put inside a classifier element
161	Element format_cl = (Element)new_cl.cloneNode(false);
162	Element format = (Element)GSXML.getChildByTagName(cl, GSXML.FORMAT_ELEM);
163	if (format != null) {
164
165	//copy all the children
166	NodeList elems = format.getChildNodes();
167	for (int j=0; j<elems.getLength();j++) {
168	format_cl.appendChild(this.doc.importNode(elems.item(j), true));
169	}
170	cb_format_info.appendChild(format_cl);
171	format_found = true;
172	}
173
174
175	}
176
177	if (format_found) {
178	this.format_info_map.put(CLASSIFIER_SERVICE, cb_format_info);
179	}
180
181	// look for document display format
182	String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
183	Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
184	if (display_format != null) {
185	this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
186	// shoudl we make a copy?
187	}
188	return true;
189	}
190
191	protected Element getServiceDescription(String service_id, String lang, String subset) {
192
193	if (service_id.equals(CLASSIFIER_SERVICE)) {
194
195	Element class_list = (Element)GSXML.getChildByTagName(this.config_info, GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
196	if (class_list == null) {
197	// no classifiers specified
198	return null;
199	}
200
201	Element cb_service = this.doc.createElement(GSXML.SERVICE_ELEM);
202	cb_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_BROWSE);
203	cb_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_SERVICE);
204	cb_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(CLASSIFIER_SERVICE+".name", lang)));
205	cb_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(CLASSIFIER_SERVICE+".description", lang)));
206
207	Element cl_list = this.doc.createElement(GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
208	cb_service.appendChild(cl_list);
209	NodeList classifiers = class_list.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
210	for(int i=0; i<classifiers.getLength(); i++) {
211	Element cl = (Element)classifiers.item(i);
212	Element new_cl = (Element)this.doc.importNode(cl, false); // just import this node, not the children
213	String content = cl.getAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
214	cl_list.appendChild(new_cl);
215	String text = GSXML.getDisplayText(cl,
216	GSXML.DISPLAY_TEXT_NAME,
217	lang, "en");
218	if (text == null \|\| text.equals("")) {
219	// no display element was specified, use the metadata name
220	// for now this looks in the class properties file
221	// this needs to use a general metadata thing instead
222	text = getMetadataNameText(content+".buttonname", lang);
223	}
224	if (text == null) {
225	text = content;
226	}
227
228	Element cl_name = GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, text);
229	new_cl.appendChild(cl_name);
230
231	// description
232
233	String meta_name = getMetadataNameText(content, lang);
234	if (meta_name==null) {
235	meta_name = content;
236	}
237	String [] array = {meta_name};
238	String description = getTextString("ClassifierBrowse.classifier_help", array, lang);
239	Element cl_desc = GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, description);
240	new_cl.appendChild(cl_desc);
241
242	}
243	return cb_service;
244	}
245
246	// these ones are probably never called, but put them here just in case
247
248	if (service_id.equals(CLASSIFIER_METADATA_SERVICE)) {
249
250	Element cbmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
251	cbmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
252	cbmr_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_METADATA_SERVICE);
253	return cbmr_service;
254	}
255
256	if (service_id.equals(DOCUMENT_STRUCTURE_RETRIEVE_SERVICE)) {
257	Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
258	dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
259	dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
260	return dsr_service;
261	}
262	if (service_id.equals(DOCUMENT_METADATA_RETRIEVE_SERVICE)) {
263	Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
264	dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
265	dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
266	return dmr_service;
267	}
268
269	if (service_id.equals(DOCUMENT_CONTENT_RETRIEVE_SERVICE)) {
270	Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
271	dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
272	dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
273	return dcr_service;
274	}
275
276	return null;
277	}
278
279	/** this looks for any classifier specific display or format info from extra_info and adds it in to the correct place in info */
280	protected boolean extractExtraClassifierInfo(Element info, Element extra_info) {
281
282	if (extra_info == null) {
283	return false;
284	}
285
286	Document owner = info.getOwnerDocument();
287	// so far we have display and format elements that we need for classifiers
288	NodeList classifiers = info.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
289	Element config_browse = (Element)GSXML.getChildByTagName(extra_info, GSXML.BROWSE_ELEM);
290
291	for (int i=0; i<classifiers.getLength();i++) {
292	Element cl = (Element)classifiers.item(i);
293	String name = cl.getAttribute(GSXML.NAME_ATT);
294	Element node_extra = GSXML.getNamedElement(config_browse,
295	GSXML.CLASSIFIER_ELEM,
296	GSXML.NAME_ATT,
297	name);
298	if (node_extra == null) {
299	System.err.println("GS2REtrieve: haven't found extra info for classifier named "+name);
300	continue;
301	}
302
303	// get the display elements if any - displayName
304	NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
305	if (display_names !=null) {
306	Element display = owner.createElement(GSXML.DISPLAY_ELEM);
307	for (int j=0; j<display_names.getLength(); j++) {
308	Element e = (Element)display_names.item(j);
309	cl.appendChild(owner.importNode(e, true));
310
311	}
312	}
313
314	// get the format element if any
315	Element format = (Element)GSXML.getChildByTagName(node_extra, GSXML.FORMAT_ELEM);
316	if (format==null) { // try a generic one that applies to all classifiers
317	format = (Element)GSXML.getChildByTagName(extra_info,
318	GSXML.FORMAT_ELEM);
319	}
320	if (format!=null) { // append to index info
321	cl.appendChild(owner.importNode(format, true));
322	}
323	} // for each classifier
324
325	// now check for default format info
326	Element default_format = (Element)GSXML.getChildByTagName(config_browse, GSXML.FORMAT_ELEM);
327	if (default_format!=null) { // append to info
328	info.appendChild(owner.importNode(default_format, true));
329	}
330
331	return true;
332	}
333
334
335	/** parent is true if this node is definitely the parent of something,
336	* child is true is it definitely is a child of something - just for efficiency purposes */
337	protected Element createDocNode(String node_id, boolean parent, boolean child) {
338
339	// create this here or pass it in?
340	DBInfo info = this.gdbm_src.getInfo(node_id);
341	Element node;
342	if (isClassifier(node_id)) {
343	node = this.doc.createElement(GSXML.CLASS_NODE_ELEM);
344	//String childtype = info.getInfo("childtype");
345	//String orientation="";
346	//if (childtype.equals("HList")) {
347	// orientation = "horizontal";
348	//} else { // assume vertical
349	// orientation = "vertical";
350	//}
351	//node.setAttribute(GSXML.CLASS_NODE_ORIENTATION_ATT, orientation);
352	} else {
353
354	node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
355
356	String top_id = OID.getTop(node_id);
357	boolean is_top = (top_id.equals(node_id) ? true : false);
358
359	String children = info.getInfo("contains");
360	boolean is_leaf = (children.equals("") ? true : false);
361
362	// set teh node type att
363	if (is_top) {
364	node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
365	} else if (is_leaf) {
366	node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
367	} else {
368	node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERIOR);
369	}
370
371	// set teh doc type att
372	if (is_top && is_leaf) { // a single section document
373	node.setAttribute(GSXML.DOC_TYPE_ATT, "simple");
374
375	} else {
376
377	if (!is_top) { // we need to look at the top info
378	info = this.gdbm_src.getInfo(top_id);
379	}
380
381	String childtype = info.getInfo("childtype");
382	if (childtype.equals("Paged")) {
383	node.setAttribute(GSXML.DOC_TYPE_ATT, "paged");
384	} else {
385	node.setAttribute(GSXML.DOC_TYPE_ATT, "hierarchy");
386	}
387	}
388
389	}
390	node.setAttribute(GSXML.NODE_ID_ATT, node_id);
391	return node;
392
393	}
394	/** Returns the parent of a specified documentID, or null if none exists */
395	protected Element getParent(String doc_id)
396	{
397	String parent_id = OID.getParent(doc_id);
398	if (parent_id.equals(doc_id))
399	return null;
400
401	return createDocNode(parent_id, true, false);
402	}
403
404
405	/** adds all the children of doc_id the the doc element,
406	* and if recursive=true, adds all their children as well*/
407	protected void addDescendants(Element doc, String doc_id,
408	boolean recursive)
409	{
410	DBInfo info = this.gdbm_src.getInfo(doc_id);
411	String contains = info.getInfo("contains");
412
413	StringTokenizer st = new StringTokenizer(contains, ";");
414	while (st.hasMoreTokens()) {
415	String child_id = st.nextToken().replaceAll("\"", doc_id);
416	Element child = createDocNode(child_id, false, true);
417	doc.appendChild(child);
418
419	// Apply recursively, if desired
420	if (recursive) {
421	addDescendants(child, child_id, recursive);
422	}
423
424	}
425	}
426
427	/** adds all the siblings of current_id to the parent element. */
428	protected Element addSiblings(Element parent, String parent_id, String current_id) {
429	Element current_node = (Element)parent.getFirstChild();
430	if (current_node ==null) {
431	// create a sensible error message
432	System.err.println("GS2Retrieve Error: there should be a first child.");
433	return null;
434	}
435	// remove the current child,- will add it in later in its correct place
436	parent.removeChild(current_node);
437
438	// add in all the siblings,
439	addDescendants(parent, parent_id, false);
440
441	// find the node that is now the current node
442	// this assumes that the new node that was created is the same as
443	// the old one that was removed - we may want to replace the new one
444	// with the old one.
445	Element new_current = GSXML.getNamedElement(parent, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
446	return new_current;
447
448	}
449	/** Returns true if the OID specifies a leaf node, false otherwise
450	Note: this makes a request to the GDBM database so it may not be
451	a particularly cheap operation */
452	protected boolean isLeafNode(String oid)
453	{
454	DBInfo info = this.gdbm_src.getInfo(oid);
455	String children = info.getInfo("contains");
456	return (children.equals(""));
457	}
458
459	// for now just use CL for classifiers - should have a type? in teh gdbm
460	// database.
461	protected boolean isClassifier(String oid) {
462	if (oid.startsWith("CL")) {
463	return true;
464	}
465	return false;
466	}
467
468	protected Element processDocumentStructureRetrieve(Element request) {
469	return genericStructureRetrieve(request, DOCUMENT);
470	}
471
472	protected Element processClassifierBrowse(Element request) {
473	return genericStructureRetrieve(request, CLASSIFIER);
474	}
475
476	/** Retrieve the structure of a document */
477	protected Element genericStructureRetrieve(Element request, int type)
478	{
479	// Create a new (empty) result message
480	Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
481
482	String node_name;
483	String service_name;
484	if (type==DOCUMENT) {
485	service_name = DOCUMENT_STRUCTURE_RETRIEVE_SERVICE;
486	node_name = GSXML.DOC_NODE_ELEM;
487	} else {
488	service_name = CLASSIFIER_SERVICE;
489	node_name = GSXML.CLASS_NODE_ELEM;
490	}
491
492	result.setAttribute(GSXML.FROM_ATT, service_name);
493	result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
494
495
496	Element query_doc_list = (Element) GSXML.getChildByTagName(request, node_name+GSXML.LIST_MODIFIER);
497	if (query_doc_list == null) {
498	System.err.println("GS2Retrieve Error: DocumentStructureRetrieve request specified no doc nodes.\n");
499	return result;
500	}
501
502	// Get the parameters of the request
503	Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
504	if (param_list == null) {
505	System.err.println("GS2Retrieve Error: DocumentStructureRetrieve request had no paramList.");
506	return result; // Return the empty result
507	}
508
509	// the type of info required
510	boolean want_structure = false;
511	boolean want_info = false;
512
513	Vector info_types=new Vector();
514	// The document structure information desired
515	boolean want_ancestors = false;
516	boolean want_parent = false;
517	boolean want_siblings = false;
518	boolean want_children = false;
519	boolean want_descendants = false;
520
521	boolean want_entire_structure = false;
522	// Process the request parameters
523	NodeList params = param_list.getElementsByTagName(GSXML.PARAM_ELEM);
524	for (int i=0; i<params.getLength();i++) {
525
526	Element param = (Element)params.item(i);
527	String p_name = param.getAttribute(GSXML.NAME_ATT);
528	String p_value = GSXML.getValue(param);
529	// Identify the structure information desired
530	if (p_name.equals(STRUCT_PARAM)) {
531	want_structure = true;
532
533	// This is NOT locale sensitive
534	if (p_value.equals(STRUCT_ANCESTORS))
535	want_ancestors = true;
536	else if (p_value.equals(STRUCT_PARENT))
537	want_parent = true;
538	else if (p_value.equals(STRUCT_SIBS))
539	want_siblings = true;
540	else if (p_value.equals(STRUCT_CHILDREN))
541	want_children = true;
542	else if (p_value.equals(STRUCT_DESCENDS))
543	want_descendants = true;
544	else if (p_value.equals(STRUCT_ENTIRE))
545	want_entire_structure = true;
546	else
547	System.err.println("GS2Retrieve Warning: Unknown value \"" + p_value + "\".");
548	} else if (p_name.equals(INFO_PARAM)) {
549	want_info = true;
550	info_types.add(p_value);
551	}
552	}
553
554	// Make sure there is no repeated information
555	if (want_ancestors)
556	want_parent = false;
557	if (want_descendants)
558	want_children = false;
559
560
561
562	Element doc_list = this.doc.createElement(node_name+GSXML.LIST_MODIFIER);
563	result.appendChild(doc_list);
564
565	// Get the documents
566	String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list,
567	GSXML.NODE_ID_ATT);
568	for (int i = 0; i < doc_ids.length; i++) {
569	String doc_id = doc_ids[i];
570
571	if (OID.needsTranslating(doc_id)) {
572	doc_id = this.gdbm_src.translateOID(doc_id);
573	}
574
575	// Add the document to the list
576	Element doc = this.doc.createElement(node_name);
577	doc_list.appendChild(doc);
578	doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
579
580
581	if (want_info) {
582
583	Element info_elem = this.doc.createElement("nodeStructureInfo");
584	doc.appendChild(info_elem);
585
586	for (int j=0; j<info_types.size(); j++) {
587	String info_type = (String)info_types.get(j);
588	Element inf = getInfo(doc_id, info_type);
589	if (inf != null) {
590	info_elem.appendChild(inf);
591	}
592	}
593	}
594	if (want_structure) {
595	// all structure info goes into a nodeStructure elem
596	Element structure_elem = this.doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
597	doc.appendChild(structure_elem);
598
599	if (want_entire_structure) {
600	String top_id = OID.getTop(doc_id);
601	Element top_node = createDocNode(top_id, true, false);
602	addDescendants(top_node, top_id, true);
603	structure_elem.appendChild(top_node);
604	continue; // with the next document, we dont need to do any more here
605	}
606
607	// Add the requested structure information
608	Element current = createDocNode(doc_id, false, false);
609
610	//Ancestors: continually add parent nodes until the root is reached
611	Element top_node = current; // the top node so far
612	if (want_ancestors) {
613	String current_id = doc_id;
614	while (true) {
615	Element parent = getParent(current_id);
616	if (parent == null)
617	break;
618
619	parent.appendChild(top_node);
620	current_id = parent.getAttribute(GSXML.NODE_ID_ATT);
621	top_node = parent;
622	}
623	}
624	// Parent: get the parent of the selected node
625	if (want_parent) {
626	Element parent = getParent(doc_id);
627	if (parent != null) {
628	parent.appendChild(current);
629	top_node = parent;
630	}
631	}
632
633
634	// now the top node is the root of the structure
635	structure_elem.appendChild(top_node);
636
637	//Siblings: get the other descendants of the selected node's parent
638	if (want_siblings) {
639	Element parent = (Element)current.getParentNode(); // this may be the structure element if there has been no request for parents or ancestors
640	String parent_id = OID.getParent(doc_id);
641
642	// add siblings, - returns a pointer to the new current node
643	current = addSiblings(parent, parent_id, doc_id);
644	}
645
646	// Children: get the descendants, but only one level deep
647	if (want_children)
648	addDescendants(current, doc_id, false);
649	// Descendants: recursively get every descendant of the selected node
650	if (want_descendants)
651	addDescendants(current, doc_id, true);
652	} // if want structure
653	} // for each doc
654	return result;
655	}
656
657
658	protected Element processDocumentMetadataRetrieve(Element request) {
659	return genericMetadataRetrieve(request, DOCUMENT);
660	}
661
662	protected Element processClassifierBrowseMetadataRetrieve(Element request) {
663	return genericMetadataRetrieve(request, CLASSIFIER);
664	}
665
666
667	/** Retrieve metadata associated with a document or classifier node*/
668	protected Element genericMetadataRetrieve(Element request, int type)
669	{
670	// Create a new (empty) result message
671	Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
672
673	String node_name;
674
675	String service_name;
676	if (type==DOCUMENT) {
677	service_name = DOCUMENT_METADATA_RETRIEVE_SERVICE;
678	node_name = GSXML.DOC_NODE_ELEM;
679	} else {
680	service_name = CLASSIFIER_METADATA_SERVICE;
681	node_name = GSXML.CLASS_NODE_ELEM;
682	}
683	result.setAttribute(GSXML.FROM_ATT, service_name);
684	result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
685
686	// Get the parameters of the request
687	Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
688	if (param_list == null) {
689	System.err.println("GS2Retrieve, DocumentMetadataRetrieve Error: missing paramList.\n");
690	return result; // Return the empty result
691	}
692
693	// The metadata information required
694	Vector metadata_list = new Vector();
695	boolean all_metadata = false;
696	// Process the request parameters
697	Element param = (Element) param_list.getFirstChild();
698	while (param != null) {
699	// Identify the metadata information desired
700	if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) {
701	String metadata = GSXML.getValue(param);
702	if (metadata.equals("all")) {
703	all_metadata = true;
704	break;
705	}
706	metadata_list.add(metadata);
707	}
708	param = (Element) param.getNextSibling();
709	}
710
711	Element node_list = this.doc.createElement(node_name+GSXML.LIST_MODIFIER);
712	result.appendChild(node_list);
713
714	// Get the documents
715	Element request_node_list = (Element) GSXML.getChildByTagName(request, node_name+GSXML.LIST_MODIFIER);
716	if (request_node_list == null) {
717	System.err.println("Error: DocumentMetadataRetrieve request had no "+node_name+"List.\n");
718	return result;
719	}
720
721	NodeList request_nodes = request_node_list.getChildNodes();
722	for (int i = 0; i < request_nodes.getLength(); i++) {
723	Element request_node = (Element) request_nodes.item(i);
724	String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT);
725
726	if (OID.needsTranslating(node_id)) {
727	node_id = this.gdbm_src.translateOID(node_id);
728	}
729
730	// Add the document to the list
731	Element new_node = (Element)this.doc.importNode(request_node, false);
732	node_list.appendChild(new_node);
733
734	// Add the requested metadata information
735	Element node_meta_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
736	new_node.appendChild(node_meta_list);
737	DBInfo info = this.gdbm_src.getInfo(node_id);
738	if (info == null) {// I have had a case where it is null!
739	continue;
740	}
741	if (all_metadata) {
742	// return everything out of the database
743	Set keys = info.getKeys();
744	Iterator it = keys.iterator();
745	while(it.hasNext()) {
746	String key = (String)it.next();
747	String value = info.getInfo(key);
748	GSXML.addMetadata(this.doc, node_meta_list, key, value);
749	}
750	} else { // just get the selected ones
751
752	for (int m = 0; m < metadata_list.size(); m++) {
753	String metadata = (String) metadata_list.get(m);
754	String value = getMetadata(node_id, info, metadata);
755	if (!value.equals("")) {
756	GSXML.addMetadata(this.doc, node_meta_list, metadata, value);
757	}
758	}
759	}
760	}
761
762	return result;
763	}
764
765	protected final char RELATION_SEP_CHAR = '_';
766	protected final String SEPARATOR_SEP_STRING = "'";
767
768	protected String getMetadata(String node_id, DBInfo info,
769	String metadata) {
770	boolean multiple = false;
771	String relation = "";
772	String separator = ", ";
773	int pos = metadata.indexOf(RELATION_SEP_CHAR);
774	if (pos ==-1) {
775	// just a plain meta entry eg dc.Title
776	return info.getInfo(metadata);
777	}
778
779	String temp = metadata.substring(0, pos);
780	metadata = metadata.substring(pos+1);
781	// check for all on the front
782	if (temp.equals("all")) {
783	multiple=true;
784	pos = metadata.indexOf(RELATION_SEP_CHAR);
785	if (pos ==-1) {
786	temp = "";
787	} else {
788	temp = metadata.substring(0, pos);
789	metadata = metadata.substring(pos+1);
790	}
791	}
792
793	// now check for relational info
794	if (temp.equals("parent") \|\| temp.equals("root") \|\| temp.equals( "ancestors")) { // "current" "siblings" "children" "descendents"
795	relation = temp;
796	pos = metadata.indexOf(RELATION_SEP_CHAR);
797	if (pos == -1) {
798	temp = "";
799	} else {
800	temp = metadata.substring(0, pos);
801	metadata = metadata.substring(pos+1);
802	}
803	}
804
805	// now look for separator info
806	if (temp.startsWith(SEPARATOR_SEP_STRING) && temp.endsWith(SEPARATOR_SEP_STRING)) {
807	separator = temp.substring(1, temp.length()-1);
808
809	}
810
811	String relation_id = node_id;
812	if (relation.equals("parent") \|\| relation.equals("ancestors")) {
813	relation_id = OID.getParent(node_id);
814	// parent or ancestor does not include self
815	if (relation_id.equals(node_id)){
816	return "";
817	}
818	} else if (relation.equals("root")) {
819	relation_id = OID.getTop(node_id);
820	}
821
822	// now we either have a single node, or we have ancestors
823	DBInfo relation_info;
824	if (relation_id.equals(node_id)) {
825	relation_info = info;
826	} else {
827	relation_info = this.gdbm_src.getInfo(relation_id);
828	}
829	if (relation_info == null) {
830	return "";
831	}
832
833	StringBuffer result = new StringBuffer();
834
835	if (!multiple) {
836	result.append(relation_info.getInfo(metadata));
837	} else {
838	// we have multiple meta
839	Vector values = relation_info.getMultiInfo(metadata);
840	if (values != null) {
841	boolean first = true;
842	for (int i=0; i<values.size(); i++) {
843	if (first) {
844	first = false;
845	} else {
846	result.append(separator);
847	}
848	result.append(values.elementAt(i));
849	}
850	}
851	}
852	// if not ancestors, then this is all we do
853	if (!relation.equals("ancestors")) {
854	return result.toString();
855	}
856
857	// now do the ancestors
858	String current_id = relation_id;
859	relation_id = OID.getParent(current_id);
860	while (!relation_id.equals(current_id)) {
861	relation_info = this.gdbm_src.getInfo(relation_id);
862	if (relation_info == null) return result.toString();
863	if (!multiple) {
864	result.insert(0, separator);
865	result.insert(0, relation_info.getInfo(metadata));
866	} else {
867	Vector values = relation_info.getMultiInfo(metadata);
868	if (values != null) {
869	for (int i=values.size()-1; i>=0; i--) {
870	result.insert(0, separator);
871	result.insert(0, values.elementAt(i));
872	}
873	}
874
875	}
876	current_id = relation_id;
877	relation_id = OID.getParent(current_id);
878	}
879
880	return result.toString();
881	}
882
883	/** Retrieve the content of a document - implemented by concrete subclasses */
884	protected abstract Element processDocumentContentRetrieve(Element request);
885
886	/** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
887	protected String resolveImages(String doc_content, String doc_id)
888	{
889	String top_doc_id = OID.getTop(doc_id);
890	DBInfo info = this.gdbm_src.getInfo(top_doc_id);
891	String archivedir = info.getInfo("archivedir");
892	String image_dir = this.site_http_address + "/collect/"+this.cluster_name+"/index/assoc/"+archivedir;
893
894	// Resolve all "_httpdocimg_"s
895	doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
896	return doc_content;
897	}
898
899	protected Element getInfo(String doc_id, String info_type) {
900
901	String value="";
902	if (info_type.equals(INFO_NUM_SIBS)) {
903	String parent_id = OID.getParent(doc_id);
904	if (parent_id.equals(doc_id)) {
905	value="0";
906	} else {
907	value = String.valueOf(getNumChildren(parent_id));
908	}
909	} else if (info_type.equals(INFO_NUM_CHILDREN)) {
910	value = String.valueOf(getNumChildren(doc_id));
911	} else if (info_type.equals(INFO_SIB_POS)) {
912	String parent_id = OID.getParent(doc_id);
913	if (parent_id.equals(doc_id)) {
914	value="-1";
915	} else {
916	DBInfo info = this.gdbm_src.getInfo(parent_id);
917	String contains = info.getInfo("contains");
918	contains = contains.replaceAll("\"", parent_id);
919	String [] children = contains.split(";");
920	for (int i=0;i<children.length;i++) {
921	String child_id = children[i];
922	if (child_id.equals(doc_id)) {
923	value = String.valueOf(i+1); // make it from 1 to length
924	break;
925	}
926	}
927	}
928	} else {
929	return null;
930	}
931	Element info_elem = this.doc.createElement("info");
932	info_elem.setAttribute(GSXML.NAME_ATT, info_type);
933	info_elem.setAttribute(GSXML.VALUE_ATT, value);
934	return info_elem;
935	}
936
937	protected int getNumChildren(String doc_id) {
938	DBInfo info = this.gdbm_src.getInfo(doc_id);
939	String contains = info.getInfo("contains");
940	if (contains.equals("")) {
941	return 0;
942	}
943	String [] children = contains.split(";");
944	return children.length;
945	}
946
947	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: