Context Navigation

source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 15208

Last change on this file since 15208 was 15208, checked in by kjdon, 16 years ago
changed descendents to descendants to fit with other code (only in a comment, but may help later on)
Property svn:keywords set to `Author Date Id Revision`
File size: 14.8 KB

Line
1	/*
2	* AbstractGS2DocumentRetrieve.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.GSException;
23	import org.greenstone.gsdl3.util.GSXML;
24	import org.greenstone.gsdl3.util.GSFile;
25	import org.greenstone.gsdl3.util.OID;
26	import org.greenstone.gsdl3.util.MacroResolver;
27	import org.greenstone.gsdl3.util.GS2MacroResolver;
28	import org.greenstone.gsdl3.util.GSConstants;
29	import org.greenstone.gsdl3.util.GDBMWrapper;
30	import org.greenstone.gsdl3.util.DBInfo;
31	// XML classes
32	import org.w3c.dom.Document;
33	import org.w3c.dom.Element;
34	import org.w3c.dom.NodeList;
35
36	// General Java classes
37	import java.io.File;
38	import java.util.StringTokenizer;
39	import java.util.Vector;
40	import java.util.Set;
41	import java.util.Iterator;
42	import java.util.ArrayList;
43
44	import org.apache.log4j.*;
45
46	/** Implements the generic retrieval and classifier services for GS2
47	* collections.
48	*
49	* @author <a href="mailto:[email protected]">Katherine Don</a>
50	* @author <a href="mailto:[email protected]">Michael Dewsnip</a>
51	*/
52
53	public abstract class AbstractGS2DocumentRetrieve
54	extends AbstractDocumentRetrieve {
55
56	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
57
58	// protected static final String EXTLINK_PARAM = "ext"; here or in base??
59	protected String index_stem = null;
60
61	protected GDBMWrapper gdbm_src = null;
62
63
64	/** constructor */
65	protected AbstractGS2DocumentRetrieve()
66	{
67	this.gdbm_src = new GDBMWrapper();
68	this.macro_resolver = new GS2MacroResolver(this.gdbm_src);
69	}
70
71	public void cleanUp() {
72	super.cleanUp();
73	this.gdbm_src.closeDatabase();
74	}
75	/** configure this service */
76	public boolean configure(Element info, Element extra_info)
77	{
78	if (!super.configure(info, extra_info)){
79	return false;
80	}
81
82	logger.info("Configuring AbstractGS2DocumentRetrieve...");
83	//this.config_info = info;
84
85	// the index stem is either specified in the config file or is the collection name
86	Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
87	if (index_stem_elem != null) {
88	this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
89	}
90	if (this.index_stem == null \|\| this.index_stem.equals("")) {
91	logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
92	this.index_stem = this.cluster_name;
93	}
94
95	// Open GDBM database for querying
96	String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name, this.index_stem);
97	if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
98	logger.error("Could not open GDBM database!");
99	return false;
100	}
101
102	return true;
103	}
104
105	/** if id ends in .fc, .pc etc, then translate it to the correct id */
106	protected String translateId(String node_id) {
107	return this.gdbm_src.translateOID(node_id);
108	}
109
110	/** if an id is not a greenstone id (an external id) then translate
111	it to a greenstone one*/
112	protected String translateExternalId(String node_id){
113	return this.gdbm_src.externalId2OID(node_id);
114	}
115
116	/** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
117	protected String getRootId(String node_id) {
118	return OID.getTop(node_id);
119	}
120	/** returns a list of the child ids in order, null if no children */
121	protected ArrayList getChildrenIds(String node_id) {
122	DBInfo info = this.gdbm_src.getInfo(node_id);
123	if (info == null) {
124	return null;
125	}
126
127	String contains = info.getInfo("contains");
128	if (contains.equals("")) {
129	return null;
130	}
131	ArrayList children = new ArrayList();
132	StringTokenizer st = new StringTokenizer(contains, ";");
133	while (st.hasMoreTokens()) {
134	String child_id = st.nextToken().replaceAll("\"", node_id);
135	children.add(child_id);
136	}
137	return children;
138
139	}
140	/** returns the node id of the parent node, null if no parent */
141	protected String getParentId(String node_id){
142	String parent = OID.getParent(node_id);
143	if (parent.equals(node_id)) {
144	return null;
145	}
146	return parent;
147	}
148
149	/** get the metadata for the classifier node node_id
150	* returns a metadataList element:
151	* <metadataList><metadata name="xxx">value</metadata></metadataList>
152	*/
153	// assumes only one value per metadata
154	protected Element getMetadataList(String node_id, boolean all_metadata,
155	ArrayList metadata_names)
156	throws GSException {
157	Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
158	DBInfo info = this.gdbm_src.getInfo(node_id);
159	if (info == null) {
160	return null;
161	}
162	String lang = "en"; // why do we need this??
163	if (all_metadata) {
164	// return everything out of the database
165	Set keys = info.getKeys();
166	Iterator it = keys.iterator();
167	while(it.hasNext()) {
168	String key = (String)it.next();
169	//String value = info.getInfo(key);
170	Vector values = info.getMultiInfo(key);
171	for(int i=0; i<values.size(); i++) {
172	GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
173	}
174	}
175
176	} else {
177	for (int i=0; i<metadata_names.size(); i++) {
178	String meta_name = (String) metadata_names.get(i);
179	String value = getMetadata(node_id, info, meta_name, lang);
180	GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
181	}
182	}
183	return metadata_list;
184	}
185
186	/** returns the structural information asked for.
187	* info_type may be one of
188	* INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
189	*/
190	protected String getStructureInfo(String doc_id, String info_type) {
191	String value="";
192	if (info_type.equals(INFO_NUM_SIBS)) {
193	String parent_id = OID.getParent(doc_id);
194	if (parent_id.equals(doc_id)) {
195	value="0";
196	} else {
197	value = String.valueOf(getNumChildren(parent_id));
198	}
199	return value;
200	}
201
202	if (info_type.equals(INFO_NUM_CHILDREN)) {
203	return String.valueOf(getNumChildren(doc_id));
204	}
205
206
207	if (info_type.equals(INFO_SIB_POS)) {
208	String parent_id = OID.getParent(doc_id);
209	if (parent_id.equals(doc_id)) {
210	return "-1";
211	}
212
213	DBInfo info = this.gdbm_src.getInfo(parent_id);
214	if (info==null) {
215	return "-1";
216	}
217
218	String contains = info.getInfo("contains");
219	contains = contains.replaceAll("\"", parent_id);
220	String [] children = contains.split(";");
221	for (int i=0;i<children.length;i++) {
222	String child_id = children[i];
223	if (child_id.equals(doc_id)) {
224	return String.valueOf(i+1); // make it from 1 to length
225
226	}
227	}
228
229	return "-1";
230	} else {
231	return null;
232	}
233
234	}
235
236	protected int getNumChildren(String node_id) {
237	DBInfo info = this.gdbm_src.getInfo(node_id);
238	if (info == null) {
239	return 0;
240	}
241	String contains = info.getInfo("contains");
242	if (contains.equals("")) {
243	return 0;
244	}
245	String [] children = contains.split(";");
246	return children.length;
247	}
248
249	/** returns the document type of the doc that the specified node
250	belongs to. should be one of
251	GSXML.DOC_TYPE_SIMPLE,
252	GSXML.DOC_TYPE_PAGED,
253	GSXML.DOC_TYPE_HIERARCHY
254	*/
255	protected String getDocType(String node_id) {
256	DBInfo info = this.gdbm_src.getInfo(node_id);
257	if (info == null) {
258	return GSXML.DOC_TYPE_SIMPLE;
259	}
260	String doc_type = info.getInfo("doctype");
261	if (!doc_type.equals("")&&!doc_type.equals("doc")) {
262	return doc_type;
263	}
264
265	String top_id = OID.getTop(node_id);
266	boolean is_top = (top_id.equals(node_id) ? true : false);
267
268	String children = info.getInfo("contains");
269	boolean is_leaf = (children.equals("") ? true : false);
270
271	if (is_top && is_leaf) { // a single section document
272	return GSXML.DOC_TYPE_SIMPLE;
273	}
274
275	// now we just check the top node
276	if (!is_top) { // we need to look at the top info
277	info = this.gdbm_src.getInfo(top_id);
278	}
279	if (info == null) {
280	return GSXML.DOC_TYPE_HIERARCHY;
281	}
282
283	String childtype = info.getInfo("childtype");
284	if (childtype.equals("Paged")) {
285	return GSXML.DOC_TYPE_PAGED;
286	}
287	return GSXML.DOC_TYPE_HIERARCHY;
288	}
289
290	/** returns the content of a node
291	* should return a nodeContent element:
292	* <nodeContent>text content or other elements</nodeContent>
293	*/
294	abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
295
296	protected String getMetadata(String node_id, DBInfo info,
297	String metadata, String lang) {
298	boolean multiple = false;
299	String relation = "";
300	String separator = ", ";
301	int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
302	if (pos ==-1) {
303	Vector values = info.getMultiInfo(metadata);
304	if (values !=null){
305	// just a plain meta entry eg dc.Title
306	StringBuffer result = new StringBuffer();
307	boolean first = true;
308	for (int i=0; i<values.size(); i++) {
309	if (first) {
310	first = false;
311	} else {
312	result.append(separator);
313	}
314	result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
315	}
316	return result.toString();
317	}
318	else{
319	String result = info.getInfo(metadata);
320	return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
321	}
322	}
323
324	String temp = metadata.substring(0, pos);
325	metadata = metadata.substring(pos+1);
326	// check for all on the front
327	if (temp.equals("all")) {
328	multiple=true;
329	pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
330	if (pos ==-1) {
331	temp = "";
332	} else {
333	temp = metadata.substring(0, pos);
334	metadata = metadata.substring(pos+1);
335	}
336	}
337
338	// now check for relational info
339	if (temp.equals("parent") \|\| temp.equals("root") \|\| temp.equals( "ancestors")) { // "current" "siblings" "children" "descendants"
340	relation = temp;
341	pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
342	if (pos == -1) {
343	temp = "";
344	} else {
345	temp = metadata.substring(0, pos);
346	metadata = metadata.substring(pos+1);
347	}
348	}
349
350	// now look for separator info
351	if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
352	separator = temp.substring(1, temp.length()-1);
353
354	}
355
356	String relation_id = node_id;
357	if (relation.equals("parent") \|\| relation.equals("ancestors")) {
358	relation_id = OID.getParent(node_id);
359	// parent or ancestor does not include self
360	if (relation_id.equals(node_id)){
361	return "";
362	}
363	} else if (relation.equals("root")) {
364	relation_id = OID.getTop(node_id);
365	}
366
367	// now we either have a single node, or we have ancestors
368	DBInfo relation_info;
369	if (relation_id.equals(node_id)) {
370	relation_info = info;
371	} else {
372	relation_info = this.gdbm_src.getInfo(relation_id);
373	}
374	if (relation_info == null) {
375	return "";
376	}
377
378	StringBuffer result = new StringBuffer();
379
380	if (!multiple) {
381	result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
382	} else {
383	// we have multiple meta
384	Vector values = relation_info.getMultiInfo(metadata);
385	if (values != null) {
386	boolean first = true;
387	for (int i=0; i<values.size(); i++) {
388	if (first) {
389	first = false;
390	} else {
391	result.append(separator);
392	}
393	result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
394	}
395	}
396	logger.info(result);
397	}
398	// if not ancestors, then this is all we do
399	if (!relation.equals("ancestors")) {
400	return result.toString();
401	}
402
403	// now do the ancestors
404	String current_id = relation_id;
405	relation_id = OID.getParent(current_id);
406	while (!relation_id.equals(current_id)) {
407	relation_info = this.gdbm_src.getInfo(relation_id);
408	if (relation_info == null) return result.toString();
409	if (!multiple) {
410	result.insert(0, separator);
411	result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
412	} else {
413	Vector values = relation_info.getMultiInfo(metadata);
414	if (values != null) {
415	for (int i=values.size()-1; i>=0; i--) {
416	result.insert(0, separator);
417	result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
418	}
419	}
420
421	}
422	current_id = relation_id;
423	relation_id = OID.getParent(current_id);
424	}
425	return result.toString();
426	}
427
428
429	/** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
430	protected String resolveTextMacros(String doc_content, String doc_id, String lang)
431	{
432	// resolve any collection specific macros
433	doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
434	return doc_content;
435	}
436
437	protected Element getInfo(String doc_id, String info_type) {
438
439	String value="";
440	if (info_type.equals(INFO_NUM_SIBS)) {
441	String parent_id = OID.getParent(doc_id);
442	if (parent_id.equals(doc_id)) {
443	value="0";
444	} else {
445	value = String.valueOf(getNumChildren(parent_id));
446	}
447	} else if (info_type.equals(INFO_NUM_CHILDREN)) {
448	value = String.valueOf(getNumChildren(doc_id));
449	} else if (info_type.equals(INFO_SIB_POS)) {
450	String parent_id = OID.getParent(doc_id);
451	if (parent_id.equals(doc_id)) {
452	value="-1";
453	} else {
454	DBInfo info = this.gdbm_src.getInfo(parent_id);
455	if (info==null) {
456	value ="-1";
457	} else {
458	String contains = info.getInfo("contains");
459	contains = contains.replaceAll("\"", parent_id);
460	String [] children = contains.split(";");
461	for (int i=0;i<children.length;i++) {
462	String child_id = children[i];
463	if (child_id.equals(doc_id)) {
464	value = String.valueOf(i+1); // make it from 1 to length
465	break;
466	}
467	}
468	}
469	}
470	} else {
471	return null;
472	}
473	Element info_elem = this.doc.createElement("info");
474	info_elem.setAttribute(GSXML.NAME_ATT, info_type);
475	info_elem.setAttribute(GSXML.VALUE_ATT, value);
476	return info_elem;
477	}
478
479	protected String getHrefOID(String href_url){
480	return this.gdbm_src.docnum2OID(href_url);
481	}
482
483	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: