Context Navigation

source: branches/ant-install-branch/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 9815

Last change on this file since 9815 was 9815, checked in by kjdon, 19 years ago
some methods from documentretrieve classes now throw GSExceptions. am trying to make it so that no Exceptions get to the user interface. returning a lot more error elements too, in the hope that they may be useful for other people
Property svn:keywords set to `Author Date Id Revision`
File size: 14.4 KB

Line
1	/*
2	* AbstractGS2DocumentRetrieve.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.GSException;
23	import org.greenstone.gsdl3.util.GSXML;
24	import org.greenstone.gsdl3.util.GSFile;
25	import org.greenstone.gsdl3.util.OID;
26	import org.greenstone.gsdl3.util.MacroResolver;
27	import org.greenstone.gsdl3.util.GS2MacroResolver;
28	import org.greenstone.gsdl3.util.GSConstants;
29	import org.greenstone.gsdl3.util.GDBMWrapper;
30	import org.greenstone.gsdl3.util.DBInfo;
31	// XML classes
32	import org.w3c.dom.Document;
33	import org.w3c.dom.Element;
34	import org.w3c.dom.NodeList;
35
36	// General Java classes
37	import java.io.File;
38	import java.util.StringTokenizer;
39	import java.util.Vector;
40	import java.util.Set;
41	import java.util.Iterator;
42	import java.util.ArrayList;
43
44	/** Implements the generic retrieval and classifier services for GS2
45	* collections.
46	*
47	* @author <a href="mailto:[email protected]">Katherine Don</a>
48	* @author <a href="mailto:[email protected]">Michael Dewsnip</a>
49	*/
50
51	public abstract class AbstractGS2DocumentRetrieve
52	extends AbstractDocumentRetrieve {
53
54	protected static final String INDEX_STEM_ELEM = "indexStem";
55
56	// protected static final String EXTLINK_PARAM = "ext"; here or in base??
57	protected String index_stem = null;
58
59	protected GDBMWrapper gdbm_src = null;
60
61
62	/** constructor */
63	protected AbstractGS2DocumentRetrieve()
64	{
65	this.gdbm_src = new GDBMWrapper();
66	this.macro_resolver = new GS2MacroResolver(gdbm_src);
67	}
68
69
70	/** configure this service */
71	public boolean configure(Element info, Element extra_info)
72	{
73
74	System.out.println("Configuring AbstractGS2DocumentRetrieve...");
75	//this.config_info = info;
76
77	// Open GDBM database for querying
78	String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
79	if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
80	System.err.println("AbstractGS2DocumentRetrieve Error: Could not open GDBM database!");
81	return false;
82	}
83
84	// the index stem is either specified in the config file or is the collection name
85	Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
86	if (index_stem_elem != null) {
87	this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
88	}
89	if (this.index_stem == null \|\| this.index_stem.equals("")) {
90	System.err.println("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
91	this.index_stem = this.cluster_name;
92	}
93
94
95	return super.configure(info, extra_info);
96
97	}
98
99	/** if id ends in .fc, .pc etc, then translate it to the correct id */
100	protected String translateId(String node_id) {
101	return this.gdbm_src.translateOID(node_id);
102	}
103
104	/** if an id is not a greenstone id (an external id) then translate
105	it to a greenstone one*/
106	protected String translateExternalId(String node_id){
107	return this.gdbm_src.externalId2OID(node_id);
108	}
109
110	/** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
111	protected String getRootId(String node_id) {
112	return OID.getTop(node_id);
113	}
114	/** returns a list of the child ids in order, null if no children */
115	protected ArrayList getChildrenIds(String node_id) {
116	DBInfo info = this.gdbm_src.getInfo(node_id);
117	if (info == null) {
118	return null;
119	}
120
121	String contains = info.getInfo("contains");
122	if (contains.equals("")) {
123	return null;
124	}
125	ArrayList children = new ArrayList();
126	StringTokenizer st = new StringTokenizer(contains, ";");
127	while (st.hasMoreTokens()) {
128	String child_id = st.nextToken().replaceAll("\"", node_id);
129	children.add(child_id);
130	}
131	return children;
132
133	}
134	/** returns the node id of the parent node, null if no parent */
135	protected String getParentId(String node_id){
136	String parent = OID.getParent(node_id);
137	if (parent.equals(node_id)) {
138	return null;
139	}
140	return parent;
141	}
142
143	/** get the metadata for the classifier node node_id
144	* returns a metadataList element:
145	* <metadataList><metadata name="xxx">value</metadata></metadataList>
146	*/
147	// assumes only one value per metadata
148	protected Element getMetadataList(String node_id, boolean all_metadata,
149	ArrayList metadata_names)
150	throws GSException {
151	Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
152	DBInfo info = this.gdbm_src.getInfo(node_id);
153	if (info == null) {
154	return null;
155	}
156	String lang = "en"; // why do we need this??
157	if (all_metadata) {
158	// return everything out of the database
159	Set keys = info.getKeys();
160	Iterator it = keys.iterator();
161	while(it.hasNext()) {
162	String key = (String)it.next();
163	String value = info.getInfo(key);
164	GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(value, lang, MacroResolver.SCOPE_META, node_id));
165	}
166
167	} else {
168	for (int i=0; i<metadata_names.size(); i++) {
169	String meta_name = (String) metadata_names.get(i);
170	String value = getMetadata(node_id, info, meta_name, lang);
171	GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
172	}
173	}
174	return metadata_list;
175	}
176
177	/** returns the structural information asked for.
178	* info_type may be one of
179	* INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
180	*/
181	protected String getStructureInfo(String doc_id, String info_type) {
182	String value="";
183	if (info_type.equals(INFO_NUM_SIBS)) {
184	String parent_id = OID.getParent(doc_id);
185	if (parent_id.equals(doc_id)) {
186	value="0";
187	} else {
188	value = String.valueOf(getNumChildren(parent_id));
189	}
190	return value;
191	}
192
193	if (info_type.equals(INFO_NUM_CHILDREN)) {
194	return String.valueOf(getNumChildren(doc_id));
195	}
196
197
198	if (info_type.equals(INFO_SIB_POS)) {
199	String parent_id = OID.getParent(doc_id);
200	if (parent_id.equals(doc_id)) {
201	return "-1";
202	}
203
204	DBInfo info = this.gdbm_src.getInfo(parent_id);
205	if (info==null) {
206	return "-1";
207	}
208
209	String contains = info.getInfo("contains");
210	contains = contains.replaceAll("\"", parent_id);
211	String [] children = contains.split(";");
212	for (int i=0;i<children.length;i++) {
213	String child_id = children[i];
214	if (child_id.equals(doc_id)) {
215	return String.valueOf(i+1); // make it from 1 to length
216
217	}
218	}
219
220	return "-1";
221	} else {
222	return null;
223	}
224
225	}
226
227	protected int getNumChildren(String node_id) {
228	DBInfo info = this.gdbm_src.getInfo(node_id);
229	if (info == null) {
230	return 0;
231	}
232	String contains = info.getInfo("contains");
233	if (contains.equals("")) {
234	return 0;
235	}
236	String [] children = contains.split(";");
237	return children.length;
238	}
239
240	/** returns the document type of the doc that the specified node
241	belongs to. should be one of
242	GSXML.DOC_TYPE_SIMPLE,
243	GSXML.DOC_TYPE_PAGED,
244	GSXML.DOC_TYPE_HIERARCHY
245	*/
246	protected String getDocType(String node_id) {
247	DBInfo info = this.gdbm_src.getInfo(node_id);
248	if (info == null) {
249	return GSXML.DOC_TYPE_SIMPLE;
250	}
251	String doc_type = info.getInfo("doctype");
252	if (!doc_type.equals("")&&!doc_type.equals("doc")) {
253	return doc_type;
254	}
255
256	String top_id = OID.getTop(node_id);
257	boolean is_top = (top_id.equals(node_id) ? true : false);
258
259	String children = info.getInfo("contains");
260	boolean is_leaf = (children.equals("") ? true : false);
261
262	if (is_top && is_leaf) { // a single section document
263	return GSXML.DOC_TYPE_SIMPLE;
264	}
265
266	// now we just check the top node
267	if (!is_top) { // we need to look at the top info
268	info = this.gdbm_src.getInfo(top_id);
269	}
270	if (info == null) {
271	return GSXML.DOC_TYPE_HIERARCHY;
272	}
273
274	String childtype = info.getInfo("childtype");
275	if (childtype.equals("Paged")) {
276	return GSXML.DOC_TYPE_PAGED;
277	}
278	return GSXML.DOC_TYPE_HIERARCHY;
279	}
280
281	/** returns the content of a node
282	* should return a nodeContent element:
283	* <nodeContent>text content or other elements</nodeContent>
284	*/
285	abstract protected Element getNodeContent(String doc_id) throws GSException;
286
287	protected String getMetadata(String node_id, DBInfo info,
288	String metadata, String lang) {
289	boolean multiple = false;
290	String relation = "";
291	String separator = ", ";
292	int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
293	if (pos ==-1) {
294	// just a plain meta entry eg dc.Title
295	return macro_resolver.resolve((String)info.getInfo(metadata), lang, MacroResolver.SCOPE_META, node_id);
296	}
297
298	String temp = metadata.substring(0, pos);
299	metadata = metadata.substring(pos+1);
300	// check for all on the front
301	if (temp.equals("all")) {
302	multiple=true;
303	pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
304	if (pos ==-1) {
305	temp = "";
306	} else {
307	temp = metadata.substring(0, pos);
308	metadata = metadata.substring(pos+1);
309	}
310	}
311
312	// now check for relational info
313	if (temp.equals("parent") \|\| temp.equals("root") \|\| temp.equals( "ancestors")) { // "current" "siblings" "children" "descendents"
314	relation = temp;
315	pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
316	if (pos == -1) {
317	temp = "";
318	} else {
319	temp = metadata.substring(0, pos);
320	metadata = metadata.substring(pos+1);
321	}
322	}
323
324	// now look for separator info
325	if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
326	separator = temp.substring(1, temp.length()-1);
327
328	}
329
330	String relation_id = node_id;
331	if (relation.equals("parent") \|\| relation.equals("ancestors")) {
332	relation_id = OID.getParent(node_id);
333	// parent or ancestor does not include self
334	if (relation_id.equals(node_id)){
335	return "";
336	}
337	} else if (relation.equals("root")) {
338	relation_id = OID.getTop(node_id);
339	}
340
341	// now we either have a single node, or we have ancestors
342	DBInfo relation_info;
343	if (relation_id.equals(node_id)) {
344	relation_info = info;
345	} else {
346	relation_info = this.gdbm_src.getInfo(relation_id);
347	}
348	if (relation_info == null) {
349	return "";
350	}
351
352	StringBuffer result = new StringBuffer();
353
354	if (!multiple) {
355	result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
356	} else {
357	// we have multiple meta
358	Vector values = relation_info.getMultiInfo(metadata);
359	if (values != null) {
360	boolean first = true;
361	for (int i=0; i<values.size(); i++) {
362	if (first) {
363	first = false;
364	} else {
365	result.append(separator);
366	}
367	result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
368	}
369	}
370	}
371	// if not ancestors, then this is all we do
372	if (!relation.equals("ancestors")) {
373	return result.toString();
374	}
375
376	// now do the ancestors
377	String current_id = relation_id;
378	relation_id = OID.getParent(current_id);
379	while (!relation_id.equals(current_id)) {
380	relation_info = this.gdbm_src.getInfo(relation_id);
381	if (relation_info == null) return result.toString();
382	if (!multiple) {
383	result.insert(0, separator);
384	result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
385	} else {
386	Vector values = relation_info.getMultiInfo(metadata);
387	if (values != null) {
388	for (int i=values.size()-1; i>=0; i--) {
389	result.insert(0, separator);
390	result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
391	}
392	}
393
394	}
395	current_id = relation_id;
396	relation_id = OID.getParent(current_id);
397	}
398	return result.toString();
399	}
400
401
402	/** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
403	protected String resolveTextMacros(String doc_content, String doc_id, String lang)
404	{
405	DBInfo info = null;
406	if (doc_content.indexOf("_httpdocimg_")!=-1) {
407	String top_doc_id = OID.getTop(doc_id);
408	info = this.gdbm_src.getInfo(top_doc_id);
409	if (info == null) {
410	// perhaps we had per.iods in the ids - just try the current id
411	top_doc_id = doc_id;
412	info = this.gdbm_src.getInfo(top_doc_id);
413	}
414	if (info != null) {
415	String archivedir = info.getInfo("archivedir");
416	String image_dir = this.site_http_address + "/collect/"+this.cluster_name+"/index/assoc/"+archivedir;
417
418	// Resolve all "_httpdocimg_"s
419	doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
420	}
421	}
422	// resolve any collection specific macros
423	doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
424	return doc_content;
425	}
426
427	protected Element getInfo(String doc_id, String info_type) {
428
429	String value="";
430	if (info_type.equals(INFO_NUM_SIBS)) {
431	String parent_id = OID.getParent(doc_id);
432	if (parent_id.equals(doc_id)) {
433	value="0";
434	} else {
435	value = String.valueOf(getNumChildren(parent_id));
436	}
437	} else if (info_type.equals(INFO_NUM_CHILDREN)) {
438	value = String.valueOf(getNumChildren(doc_id));
439	} else if (info_type.equals(INFO_SIB_POS)) {
440	String parent_id = OID.getParent(doc_id);
441	if (parent_id.equals(doc_id)) {
442	value="-1";
443	} else {
444	DBInfo info = this.gdbm_src.getInfo(parent_id);
445	if (info==null) {
446	value ="-1";
447	} else {
448	String contains = info.getInfo("contains");
449	contains = contains.replaceAll("\"", parent_id);
450	String [] children = contains.split(";");
451	for (int i=0;i<children.length;i++) {
452	String child_id = children[i];
453	if (child_id.equals(doc_id)) {
454	value = String.valueOf(i+1); // make it from 1 to length
455	break;
456	}
457	}
458	}
459	}
460	} else {
461	return null;
462	}
463	Element info_elem = this.doc.createElement("info");
464	info_elem.setAttribute(GSXML.NAME_ATT, info_type);
465	info_elem.setAttribute(GSXML.VALUE_ATT, value);
466	return info_elem;
467	}
468
469	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: