Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 25967

Last change on this file since 25967 was 25967, checked in by kjdon, 12 years ago
added new doctype pagedhierarchy
Property svn:keywords set to `Author Date Id Revision`
File size: 17.0 KB

Line
1	/*
2	* AbstractGS2DocumentRetrieve.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.GSException;
23	import org.greenstone.gsdl3.util.GSXML;
24	import org.greenstone.gsdl3.util.GSFile;
25	import org.greenstone.gsdl3.util.OID;
26	import org.greenstone.gsdl3.util.MacroResolver;
27	import org.greenstone.gsdl3.util.GS2MacroResolver;
28	import org.greenstone.gsdl3.util.GSConstants;
29	import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30	import org.greenstone.gsdl3.util.DBInfo;
31	// XML classes
32	import org.w3c.dom.Document;
33	import org.w3c.dom.Element;
34	import org.w3c.dom.NodeList;
35
36	// General Java classes
37	import java.io.File;
38	import java.util.StringTokenizer;
39	import java.util.Vector;
40	import java.util.Set;
41	import java.util.Iterator;
42	import java.util.ArrayList;
43
44	import org.apache.log4j.*;
45
46	// Apache Commons
47	import org.apache.commons.lang3.*;
48
49	/**
50	* Implements the generic retrieval and classifier services for GS2 collections.
51	*
52	* @author Katherine Don
53	* @author Michael Dewsnip
54	*/
55
56	public abstract class AbstractGS2DocumentRetrieve extends AbstractDocumentRetrieve
57	{
58
59	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
60
61	// protected static final String EXTLINK_PARAM = "ext"; here or in base??
62	protected String index_stem = null;
63
64	protected SimpleCollectionDatabase coll_db = null;
65
66	/** constructor */
67	protected AbstractGS2DocumentRetrieve()
68	{
69	this.macro_resolver = new GS2MacroResolver();
70	}
71
72	public void cleanUp()
73	{
74	super.cleanUp();
75	this.coll_db.closeDatabase();
76	}
77
78	/** configure this service */
79	public boolean configure(Element info, Element extra_info)
80	{
81	if (!super.configure(info, extra_info))
82	{
83	return false;
84	}
85
86	logger.info("Configuring AbstractGS2DocumentRetrieve...");
87	//this.config_info = info;
88
89	// the index stem is either specified in the config file or is the collection name
90	Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
91	if (index_stem_elem != null)
92	{
93	this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
94	}
95	if (this.index_stem == null \|\| this.index_stem.equals(""))
96	{
97	logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
98	this.index_stem = this.cluster_name;
99	}
100
101	// find out what kind of database we have
102	Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
103	String database_type = null;
104	if (database_type_elem != null)
105	{
106	database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
107	}
108	if (database_type == null \|\| database_type.equals(""))
109	{
110	database_type = "gdbm"; // the default
111	}
112	coll_db = new SimpleCollectionDatabase(database_type);
113	if (!coll_db.databaseOK())
114	{
115	logger.error("Couldn't create the collection database of type " + database_type);
116	return false;
117	}
118
119	// Open database for querying
120	String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
121	if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ))
122	{
123	logger.error("Could not open collection database!");
124	return false;
125	}
126
127	// we need to set the database for our GS2 macro resolver
128	GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver) this.macro_resolver;
129	gs2_macro_resolver.setDB(this.coll_db);
130
131	return true;
132	}
133
134	/** if id ends in .fc, .pc etc, then translate it to the correct id */
135	protected String translateId(String node_id)
136	{
137	return OID.translateOID(this.coll_db, node_id); //return this.coll_db.translateOID(node_id);
138	}
139
140	/**
141	* if an id is not a greenstone id (an external id) then translate it to a
142	* greenstone one
143	*/
144	protected String translateExternalId(String node_id)
145	{
146	return this.coll_db.externalId2OID(node_id);
147	}
148
149	/**
150	* returns the id of the root node of the document containing node node_id.
151	* . may be the same as node_id
152	*/
153	protected String getRootId(String node_id)
154	{
155	return OID.getTop(node_id);
156	}
157
158	/** returns a list of the child ids in order, null if no children */
159	protected ArrayList<String> getChildrenIds(String node_id)
160	{
161	DBInfo info = this.coll_db.getInfo(node_id);
162	if (info == null)
163	{
164	return null;
165	}
166
167	String contains = info.getInfo("contains");
168	if (contains.equals(""))
169	{
170	return null;
171	}
172	ArrayList<String> children = new ArrayList<String>();
173	StringTokenizer st = new StringTokenizer(contains, ";");
174	while (st.hasMoreTokens())
175	{
176	String child_id = StringUtils.replace(st.nextToken(), "\"", node_id);
177	children.add(child_id);
178	}
179	return children;
180
181	}
182
183	/** returns the node id of the parent node, null if no parent */
184	protected String getParentId(String node_id)
185	{
186	String parent = OID.getParent(node_id);
187	if (parent.equals(node_id))
188	{
189	return null;
190	}
191	return parent;
192	}
193
194	/**
195	* get the metadata for the classifier node node_id returns a metadataList
196	* element: <metadataList><metadata
197	* name="xxx">value</metadata></metadataList>
198	*/
199	// assumes only one value per metadata
200	protected Element getMetadataList(String node_id, boolean all_metadata, ArrayList<String> metadata_names) throws GSException
201	{
202	Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
203	DBInfo info = this.coll_db.getInfo(node_id);
204	if (info == null)
205	{
206	return null;
207	}
208	String lang = "en"; // why do we need this??
209	if (all_metadata)
210	{
211	// return everything out of the database
212	Set<String> keys = info.getKeys();
213	Iterator<String> it = keys.iterator();
214	while (it.hasNext())
215	{
216	String key = it.next();
217	//String value = info.getInfo(key);
218	Vector<String> values = info.getMultiInfo(key);
219	for (int i = 0; i < values.size(); i++)
220	{
221	GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
222	}
223	}
224
225	}
226	else
227	{
228	for (int i = 0; i < metadata_names.size(); i++)
229	{
230	String meta_name = metadata_names.get(i);
231	String value = getMetadata(node_id, info, meta_name, lang);
232	GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
233	}
234	}
235	return metadata_list;
236	}
237
238	/**
239	* returns the structural information asked for. info_type may be one of
240	* INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
241	*/
242	protected String getStructureInfo(String doc_id, String info_type)
243	{
244	String value = "";
245	if (info_type.equals(INFO_NUM_SIBS))
246	{
247	String parent_id = OID.getParent(doc_id);
248	if (parent_id.equals(doc_id))
249	{
250	value = "0";
251	}
252	else
253	{
254	value = String.valueOf(getNumChildren(parent_id));
255	}
256	return value;
257	}
258
259	if (info_type.equals(INFO_NUM_CHILDREN))
260	{
261	return String.valueOf(getNumChildren(doc_id));
262	}
263
264	if (info_type.equals(INFO_SIB_POS))
265	{
266	String parent_id = OID.getParent(doc_id);
267	if (parent_id.equals(doc_id))
268	{
269	return "-1";
270	}
271
272	DBInfo info = this.coll_db.getInfo(parent_id);
273	if (info == null)
274	{
275	return "-1";
276	}
277
278	String contains = info.getInfo("contains");
279	contains = StringUtils.replace(contains, "\"", parent_id);
280	String[] children = contains.split(";");
281	for (int i = 0; i < children.length; i++)
282	{
283	String child_id = children[i];
284	if (child_id.equals(doc_id))
285	{
286	return String.valueOf(i + 1); // make it from 1 to length
287
288	}
289	}
290
291	return "-1";
292	}
293	if (info_type.equals(INFO_DOC_TYPE))
294
295	{
296	return getDocType(doc_id);
297	}
298	return null;
299	}
300
301	protected int getNumChildren(String node_id)
302	{
303	DBInfo info = this.coll_db.getInfo(node_id);
304	if (info == null)
305	{
306	return 0;
307	}
308	String contains = info.getInfo("contains");
309	if (contains.equals(""))
310	{
311	return 0;
312	}
313	String[] children = contains.split(";");
314	return children.length;
315	}
316
317	/**
318	* returns the document type of the doc that the specified node belongs to.
319	* should be one of GSXML.DOC_TYPE_SIMPLE, GSXML.DOC_TYPE_PAGED,
320	* GSXML.DOC_TYPE_HIERARCHY
321	*/
322	protected String getDocType(String node_id)
323	{
324	DBInfo info = this.coll_db.getInfo(node_id);
325	if (info == null)
326	{
327	return GSXML.DOC_TYPE_SIMPLE;
328	}
329	String doc_type = info.getInfo("doctype");
330	if (!doc_type.equals("") && !doc_type.equals("doc"))
331	{
332	return doc_type;
333	}
334
335	String top_id = OID.getTop(node_id);
336	boolean is_top = (top_id.equals(node_id) ? true : false);
337
338	String children = info.getInfo("contains");
339	boolean is_leaf = (children.equals("") ? true : false);
340
341	if (is_top && is_leaf)
342	{ // a single section document
343	return GSXML.DOC_TYPE_SIMPLE;
344	}
345
346	// now we just check the top node
347	if (!is_top)
348	{ // we need to look at the top info
349	info = this.coll_db.getInfo(top_id);
350	}
351	if (info == null)
352	{
353	return GSXML.DOC_TYPE_HIERARCHY;
354	}
355
356	String childtype = info.getInfo("childtype");
357	if (childtype.equals("Paged"))
358	{
359	return GSXML.DOC_TYPE_PAGED;
360	}
361	if (childtype.equals("PagedHierarchy"))
362	{
363	return GSXML.DOC_TYPE_PAGED_HIERARCHY;
364	}
365	return GSXML.DOC_TYPE_HIERARCHY;
366	}
367
368	/**
369	* returns the content of a node should return a nodeContent element:
370	* <nodeContent>text content or other elements</nodeContent>
371	*/
372	abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
373
374	protected String getMetadata(String node_id, DBInfo info, String metadata, String lang)
375	{
376	String pos = "";
377	String relation = "";
378	String separator = ", ";
379	int index = metadata.indexOf(GSConstants.META_RELATION_SEP);
380	if (index == -1)
381	{
382	Vector<String> values = info.getMultiInfo(metadata);
383	if (values != null)
384	{
385	// just a plain meta entry eg dc.Title
386	StringBuffer result = new StringBuffer();
387	boolean first = true;
388	for (int i = 0; i < values.size(); i++)
389	{
390	if (first)
391	{
392	first = false;
393	}
394	else
395	{
396	result.append(separator);
397	}
398	result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
399	}
400	return result.toString();
401	}
402	else
403	{
404	String result = info.getInfo(metadata);
405	return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
406	}
407	}
408
409	String temp = metadata.substring(0, index);
410	metadata = metadata.substring(index + 1);
411	// check for pos on the front, indicating which piece of meta the user wants
412	// pos can be "first", "last" or the position value of the requested piece of metadata
413	if (temp.startsWith(GSConstants.META_POS))
414	{
415	temp = temp.substring(GSConstants.META_POS.length());
416	pos = temp;
417
418	index = metadata.indexOf(GSConstants.META_RELATION_SEP);
419	if (index == -1)
420	{
421	temp = "";
422	}
423	else
424	{
425	temp = metadata.substring(0, index);
426	metadata = metadata.substring(index + 1);
427	}
428	}
429
430	// now check for relational info
431	if (temp.equals("parent") \|\| temp.equals("root") \|\| temp.equals("ancestors")
432	\|\| temp.equals("siblings") \|\| temp.equals("children") \|\| temp.equals("descendants"))
433	{ // "current" "siblings" "children" "descendants"
434	// gets all siblings by default
435	relation = temp;
436	index = metadata.indexOf(GSConstants.META_RELATION_SEP);
437	if (index == -1)
438	{
439	temp = "";
440	}
441	else
442	{
443	temp = metadata.substring(0, index);
444	metadata = metadata.substring(index + 1);
445	}
446	}
447
448	// now look for separator info
449	if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP))
450	{
451	separator = temp.substring(1, temp.length() - 1);
452
453	}
454
455	String relation_id = node_id;
456	if (relation.equals("parent") \|\| relation.equals("ancestors"))
457	{
458	relation_id = OID.getParent(node_id);
459	// parent or ancestor does not include self
460	if (relation_id.equals(node_id))
461	{
462	return "";
463	}
464	}
465	else if (relation.equals("root"))
466	{
467	relation_id = OID.getTop(node_id);
468	}
469
470	// now we either have a single node, or we have ancestors
471	DBInfo relation_info;
472	if (relation_id.equals(node_id))
473	{
474	relation_info = info;
475	}
476	else
477	{
478	relation_info = this.coll_db.getInfo(relation_id);
479	}
480	if (relation_info == null)
481	{
482	return "";
483	}
484
485	StringBuffer result = new StringBuffer();
486
487	Vector<String> values = relation_info.getMultiInfo(metadata);
488
489	if (!pos.equals("")) // if a particular position was specified, so not multiple values for the metadata
490	{
491	String meta = "";
492	if (values != null) {
493	if(pos.equals(GSConstants.META_FIRST)) {
494	meta = values.firstElement();
495	} else if(pos.equals(GSConstants.META_LAST)) {
496	meta = values.lastElement();
497	} else {
498	int position = Integer.parseInt(pos);
499	if(position < values.size()) {
500	meta = values.elementAt(position);
501	}
502	}
503	} // else ""
504
505	result.append(this.macro_resolver.resolve(meta, lang, MacroResolver.SCOPE_META, relation_id));
506	}
507	else
508	{
509	if (values != null)
510	{
511	boolean first = true;
512	for (int i = 0; i < values.size(); i++)
513	{
514	if (first)
515	{
516	first = false;
517	}
518	else
519	{
520	result.append(separator);
521	}
522	result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
523	}
524	}
525	logger.info(result);
526	}
527	// if not ancestors, then this is all we do
528	if (!relation.equals("ancestors"))
529	{
530	return result.toString();
531	}
532
533	// now do the ancestors
534	String current_id = relation_id;
535	relation_id = OID.getParent(current_id);
536	while (!relation_id.equals(current_id))
537	{
538	relation_info = this.coll_db.getInfo(relation_id);
539	if (relation_info == null)
540	return result.toString();
541
542	values = relation_info.getMultiInfo(metadata);
543	if (!pos.equals("")) // if a particular position was specified, so not multiple values for the metadata
544	{
545	String meta = "";
546	if (values != null) {
547	if(pos.equals(GSConstants.META_FIRST)) {
548	meta = values.firstElement();
549	} else if(pos.equals(GSConstants.META_LAST)) {
550	meta = values.lastElement();
551	} else {
552	int position = Integer.parseInt(pos);
553	if(position < values.size()) {
554	meta = values.elementAt(position);
555	}
556	}
557	} // else ""
558
559	result.insert(0, separator);
560	result.insert(0, this.macro_resolver.resolve(meta, lang, MacroResolver.SCOPE_META, relation_id));
561	}
562	else
563	{
564	if (values != null)
565	{
566	for (int i = values.size() - 1; i >= 0; i--)
567	{
568	result.insert(0, separator);
569	result.insert(0, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
570	}
571	}
572
573	}
574	current_id = relation_id;
575	relation_id = OID.getParent(current_id);
576	}
577	return result.toString();
578	}
579
580	/**
581	* needs to get info from collection database - if the calling code gets it
582	* already it may pay to pass it in instead
583	*/
584	protected String resolveTextMacros(String doc_content, String doc_id, String lang)
585	{
586	// resolve any collection specific macros
587	doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
588	return doc_content;
589	}
590
591	protected Element getInfo(String doc_id, String info_type)
592	{
593
594	String value = "";
595	if (info_type.equals(INFO_NUM_SIBS))
596	{
597	String parent_id = OID.getParent(doc_id);
598	if (parent_id.equals(doc_id))
599	{
600	value = "0";
601	}
602	else
603	{
604	value = String.valueOf(getNumChildren(parent_id));
605	}
606	}
607	else if (info_type.equals(INFO_NUM_CHILDREN))
608	{
609	value = String.valueOf(getNumChildren(doc_id));
610	}
611	else if (info_type.equals(INFO_SIB_POS))
612	{
613	String parent_id = OID.getParent(doc_id);
614	if (parent_id.equals(doc_id))
615	{
616	value = "-1";
617	}
618	else
619	{
620	DBInfo info = this.coll_db.getInfo(parent_id);
621	if (info == null)
622	{
623	value = "-1";
624	}
625	else
626	{
627	String contains = info.getInfo("contains");
628	contains = StringUtils.replace(contains, "\"", parent_id);
629	String[] children = contains.split(";");
630	for (int i = 0; i < children.length; i++)
631	{
632	String child_id = children[i];
633	if (child_id.equals(doc_id))
634	{
635	value = String.valueOf(i + 1); // make it from 1 to length
636	break;
637	}
638	}
639	}
640	}
641	}
642	else
643	{
644	return null;
645	}
646	Element info_elem = this.doc.createElement("info");
647	info_elem.setAttribute(GSXML.NAME_ATT, info_type);
648	info_elem.setAttribute(GSXML.VALUE_ATT, value);
649	return info_elem;
650	}
651
652	protected String getHrefOID(String href_url)
653	{
654	return this.coll_db.docnum2OID(href_url);
655	}
656
657	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: