Context Navigation

source: branches/ant-install-branch/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 9824

Last change on this file since 9824 was 9824, checked in by kjdon, 19 years ago
when a collection (using gdbm) is opened by tomcat, windows holds a lock on the gdbm file, so you can't rebuild it. modified ModuleInterface to have a cleanUp method, so all modules need to implement this. for mg/mgpp and gdbm modules, they now unload the index data or close the connection to the database. so cleanUp should be called whenever you deactivate a module
Property svn:keywords set to `Author Date Id Revision`
File size: 14.5 KB

Line
1	/*
2	* AbstractGS2DocumentRetrieve.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.GSException;
23	import org.greenstone.gsdl3.util.GSXML;
24	import org.greenstone.gsdl3.util.GSFile;
25	import org.greenstone.gsdl3.util.OID;
26	import org.greenstone.gsdl3.util.MacroResolver;
27	import org.greenstone.gsdl3.util.GS2MacroResolver;
28	import org.greenstone.gsdl3.util.GSConstants;
29	import org.greenstone.gsdl3.util.GDBMWrapper;
30	import org.greenstone.gsdl3.util.DBInfo;
31	// XML classes
32	import org.w3c.dom.Document;
33	import org.w3c.dom.Element;
34	import org.w3c.dom.NodeList;
35
36	// General Java classes
37	import java.io.File;
38	import java.util.StringTokenizer;
39	import java.util.Vector;
40	import java.util.Set;
41	import java.util.Iterator;
42	import java.util.ArrayList;
43
44	/** Implements the generic retrieval and classifier services for GS2
45	* collections.
46	*
47	* @author <a href="mailto:[email protected]">Katherine Don</a>
48	* @author <a href="mailto:[email protected]">Michael Dewsnip</a>
49	*/
50
51	public abstract class AbstractGS2DocumentRetrieve
52	extends AbstractDocumentRetrieve {
53
54	protected static final String INDEX_STEM_ELEM = "indexStem";
55
56	// protected static final String EXTLINK_PARAM = "ext"; here or in base??
57	protected String index_stem = null;
58
59	protected GDBMWrapper gdbm_src = null;
60
61
62	/** constructor */
63	protected AbstractGS2DocumentRetrieve()
64	{
65	this.gdbm_src = new GDBMWrapper();
66	this.macro_resolver = new GS2MacroResolver(gdbm_src);
67	}
68
69	public void cleanUp() {
70	super.cleanUp();
71	this.gdbm_src.closeDatabase();
72	}
73	/** configure this service */
74	public boolean configure(Element info, Element extra_info)
75	{
76
77	System.out.println("Configuring AbstractGS2DocumentRetrieve...");
78	//this.config_info = info;
79
80	// Open GDBM database for querying
81	String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
82	if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
83	System.err.println("AbstractGS2DocumentRetrieve Error: Could not open GDBM database!");
84	return false;
85	}
86
87	// the index stem is either specified in the config file or is the collection name
88	Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
89	if (index_stem_elem != null) {
90	this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
91	}
92	if (this.index_stem == null \|\| this.index_stem.equals("")) {
93	System.err.println("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
94	this.index_stem = this.cluster_name;
95	}
96
97
98	return super.configure(info, extra_info);
99
100	}
101
102	/** if id ends in .fc, .pc etc, then translate it to the correct id */
103	protected String translateId(String node_id) {
104	return this.gdbm_src.translateOID(node_id);
105	}
106
107	/** if an id is not a greenstone id (an external id) then translate
108	it to a greenstone one*/
109	protected String translateExternalId(String node_id){
110	return this.gdbm_src.externalId2OID(node_id);
111	}
112
113	/** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
114	protected String getRootId(String node_id) {
115	return OID.getTop(node_id);
116	}
117	/** returns a list of the child ids in order, null if no children */
118	protected ArrayList getChildrenIds(String node_id) {
119	DBInfo info = this.gdbm_src.getInfo(node_id);
120	if (info == null) {
121	return null;
122	}
123
124	String contains = info.getInfo("contains");
125	if (contains.equals("")) {
126	return null;
127	}
128	ArrayList children = new ArrayList();
129	StringTokenizer st = new StringTokenizer(contains, ";");
130	while (st.hasMoreTokens()) {
131	String child_id = st.nextToken().replaceAll("\"", node_id);
132	children.add(child_id);
133	}
134	return children;
135
136	}
137	/** returns the node id of the parent node, null if no parent */
138	protected String getParentId(String node_id){
139	String parent = OID.getParent(node_id);
140	if (parent.equals(node_id)) {
141	return null;
142	}
143	return parent;
144	}
145
146	/** get the metadata for the classifier node node_id
147	* returns a metadataList element:
148	* <metadataList><metadata name="xxx">value</metadata></metadataList>
149	*/
150	// assumes only one value per metadata
151	protected Element getMetadataList(String node_id, boolean all_metadata,
152	ArrayList metadata_names)
153	throws GSException {
154	Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
155	DBInfo info = this.gdbm_src.getInfo(node_id);
156	if (info == null) {
157	return null;
158	}
159	String lang = "en"; // why do we need this??
160	if (all_metadata) {
161	// return everything out of the database
162	Set keys = info.getKeys();
163	Iterator it = keys.iterator();
164	while(it.hasNext()) {
165	String key = (String)it.next();
166	String value = info.getInfo(key);
167	GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(value, lang, MacroResolver.SCOPE_META, node_id));
168	}
169
170	} else {
171	for (int i=0; i<metadata_names.size(); i++) {
172	String meta_name = (String) metadata_names.get(i);
173	String value = getMetadata(node_id, info, meta_name, lang);
174	GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
175	}
176	}
177	return metadata_list;
178	}
179
180	/** returns the structural information asked for.
181	* info_type may be one of
182	* INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
183	*/
184	protected String getStructureInfo(String doc_id, String info_type) {
185	String value="";
186	if (info_type.equals(INFO_NUM_SIBS)) {
187	String parent_id = OID.getParent(doc_id);
188	if (parent_id.equals(doc_id)) {
189	value="0";
190	} else {
191	value = String.valueOf(getNumChildren(parent_id));
192	}
193	return value;
194	}
195
196	if (info_type.equals(INFO_NUM_CHILDREN)) {
197	return String.valueOf(getNumChildren(doc_id));
198	}
199
200
201	if (info_type.equals(INFO_SIB_POS)) {
202	String parent_id = OID.getParent(doc_id);
203	if (parent_id.equals(doc_id)) {
204	return "-1";
205	}
206
207	DBInfo info = this.gdbm_src.getInfo(parent_id);
208	if (info==null) {
209	return "-1";
210	}
211
212	String contains = info.getInfo("contains");
213	contains = contains.replaceAll("\"", parent_id);
214	String [] children = contains.split(";");
215	for (int i=0;i<children.length;i++) {
216	String child_id = children[i];
217	if (child_id.equals(doc_id)) {
218	return String.valueOf(i+1); // make it from 1 to length
219
220	}
221	}
222
223	return "-1";
224	} else {
225	return null;
226	}
227
228	}
229
230	protected int getNumChildren(String node_id) {
231	DBInfo info = this.gdbm_src.getInfo(node_id);
232	if (info == null) {
233	return 0;
234	}
235	String contains = info.getInfo("contains");
236	if (contains.equals("")) {
237	return 0;
238	}
239	String [] children = contains.split(";");
240	return children.length;
241	}
242
243	/** returns the document type of the doc that the specified node
244	belongs to. should be one of
245	GSXML.DOC_TYPE_SIMPLE,
246	GSXML.DOC_TYPE_PAGED,
247	GSXML.DOC_TYPE_HIERARCHY
248	*/
249	protected String getDocType(String node_id) {
250	DBInfo info = this.gdbm_src.getInfo(node_id);
251	if (info == null) {
252	return GSXML.DOC_TYPE_SIMPLE;
253	}
254	String doc_type = info.getInfo("doctype");
255	if (!doc_type.equals("")&&!doc_type.equals("doc")) {
256	return doc_type;
257	}
258
259	String top_id = OID.getTop(node_id);
260	boolean is_top = (top_id.equals(node_id) ? true : false);
261
262	String children = info.getInfo("contains");
263	boolean is_leaf = (children.equals("") ? true : false);
264
265	if (is_top && is_leaf) { // a single section document
266	return GSXML.DOC_TYPE_SIMPLE;
267	}
268
269	// now we just check the top node
270	if (!is_top) { // we need to look at the top info
271	info = this.gdbm_src.getInfo(top_id);
272	}
273	if (info == null) {
274	return GSXML.DOC_TYPE_HIERARCHY;
275	}
276
277	String childtype = info.getInfo("childtype");
278	if (childtype.equals("Paged")) {
279	return GSXML.DOC_TYPE_PAGED;
280	}
281	return GSXML.DOC_TYPE_HIERARCHY;
282	}
283
284	/** returns the content of a node
285	* should return a nodeContent element:
286	* <nodeContent>text content or other elements</nodeContent>
287	*/
288	abstract protected Element getNodeContent(String doc_id) throws GSException;
289
290	protected String getMetadata(String node_id, DBInfo info,
291	String metadata, String lang) {
292	boolean multiple = false;
293	String relation = "";
294	String separator = ", ";
295	int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
296	if (pos ==-1) {
297	// just a plain meta entry eg dc.Title
298	return macro_resolver.resolve((String)info.getInfo(metadata), lang, MacroResolver.SCOPE_META, node_id);
299	}
300
301	String temp = metadata.substring(0, pos);
302	metadata = metadata.substring(pos+1);
303	// check for all on the front
304	if (temp.equals("all")) {
305	multiple=true;
306	pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
307	if (pos ==-1) {
308	temp = "";
309	} else {
310	temp = metadata.substring(0, pos);
311	metadata = metadata.substring(pos+1);
312	}
313	}
314
315	// now check for relational info
316	if (temp.equals("parent") \|\| temp.equals("root") \|\| temp.equals( "ancestors")) { // "current" "siblings" "children" "descendents"
317	relation = temp;
318	pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
319	if (pos == -1) {
320	temp = "";
321	} else {
322	temp = metadata.substring(0, pos);
323	metadata = metadata.substring(pos+1);
324	}
325	}
326
327	// now look for separator info
328	if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
329	separator = temp.substring(1, temp.length()-1);
330
331	}
332
333	String relation_id = node_id;
334	if (relation.equals("parent") \|\| relation.equals("ancestors")) {
335	relation_id = OID.getParent(node_id);
336	// parent or ancestor does not include self
337	if (relation_id.equals(node_id)){
338	return "";
339	}
340	} else if (relation.equals("root")) {
341	relation_id = OID.getTop(node_id);
342	}
343
344	// now we either have a single node, or we have ancestors
345	DBInfo relation_info;
346	if (relation_id.equals(node_id)) {
347	relation_info = info;
348	} else {
349	relation_info = this.gdbm_src.getInfo(relation_id);
350	}
351	if (relation_info == null) {
352	return "";
353	}
354
355	StringBuffer result = new StringBuffer();
356
357	if (!multiple) {
358	result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
359	} else {
360	// we have multiple meta
361	Vector values = relation_info.getMultiInfo(metadata);
362	if (values != null) {
363	boolean first = true;
364	for (int i=0; i<values.size(); i++) {
365	if (first) {
366	first = false;
367	} else {
368	result.append(separator);
369	}
370	result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
371	}
372	}
373	}
374	// if not ancestors, then this is all we do
375	if (!relation.equals("ancestors")) {
376	return result.toString();
377	}
378
379	// now do the ancestors
380	String current_id = relation_id;
381	relation_id = OID.getParent(current_id);
382	while (!relation_id.equals(current_id)) {
383	relation_info = this.gdbm_src.getInfo(relation_id);
384	if (relation_info == null) return result.toString();
385	if (!multiple) {
386	result.insert(0, separator);
387	result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
388	} else {
389	Vector values = relation_info.getMultiInfo(metadata);
390	if (values != null) {
391	for (int i=values.size()-1; i>=0; i--) {
392	result.insert(0, separator);
393	result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
394	}
395	}
396
397	}
398	current_id = relation_id;
399	relation_id = OID.getParent(current_id);
400	}
401	return result.toString();
402	}
403
404
405	/** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
406	protected String resolveTextMacros(String doc_content, String doc_id, String lang)
407	{
408	DBInfo info = null;
409	if (doc_content.indexOf("_httpdocimg_")!=-1) {
410	String top_doc_id = OID.getTop(doc_id);
411	info = this.gdbm_src.getInfo(top_doc_id);
412	if (info == null) {
413	// perhaps we had per.iods in the ids - just try the current id
414	top_doc_id = doc_id;
415	info = this.gdbm_src.getInfo(top_doc_id);
416	}
417	if (info != null) {
418	String archivedir = info.getInfo("archivedir");
419	String image_dir = this.site_http_address + "/collect/"+this.cluster_name+"/index/assoc/"+archivedir;
420
421	// Resolve all "_httpdocimg_"s
422	doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
423	}
424	}
425	// resolve any collection specific macros
426	doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
427	return doc_content;
428	}
429
430	protected Element getInfo(String doc_id, String info_type) {
431
432	String value="";
433	if (info_type.equals(INFO_NUM_SIBS)) {
434	String parent_id = OID.getParent(doc_id);
435	if (parent_id.equals(doc_id)) {
436	value="0";
437	} else {
438	value = String.valueOf(getNumChildren(parent_id));
439	}
440	} else if (info_type.equals(INFO_NUM_CHILDREN)) {
441	value = String.valueOf(getNumChildren(doc_id));
442	} else if (info_type.equals(INFO_SIB_POS)) {
443	String parent_id = OID.getParent(doc_id);
444	if (parent_id.equals(doc_id)) {
445	value="-1";
446	} else {
447	DBInfo info = this.gdbm_src.getInfo(parent_id);
448	if (info==null) {
449	value ="-1";
450	} else {
451	String contains = info.getInfo("contains");
452	contains = contains.replaceAll("\"", parent_id);
453	String [] children = contains.split(";");
454	for (int i=0;i<children.length;i++) {
455	String child_id = children[i];
456	if (child_id.equals(doc_id)) {
457	value = String.valueOf(i+1); // make it from 1 to length
458	break;
459	}
460	}
461	}
462	}
463	} else {
464	return null;
465	}
466	Element info_elem = this.doc.createElement("info");
467	info_elem.setAttribute(GSXML.NAME_ATT, info_type);
468	info_elem.setAttribute(GSXML.VALUE_ATT, value);
469	return info_elem;
470	}
471
472	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: