Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 9000

Last change on this file since 9000 was 9000, checked in by kjdon, 19 years ago
added indexStem info into configure
Property svn:keywords set to `Author Date Id Revision`
File size: 14.2 KB

Line
1	/*
2	* AbstractGS2DocumentRetrieve.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import org.greenstone.gdbm.*;
23	import org.greenstone.gsdl3.util.GSXML;
24	import org.greenstone.gsdl3.util.GSFile;
25	import org.greenstone.gsdl3.util.OID;
26	import org.greenstone.gsdl3.util.MacroResolver;
27	import org.greenstone.gsdl3.util.GS2MacroResolver;
28	import org.greenstone.gsdl3.util.GSConstants;
29
30	// XML classes
31	import org.w3c.dom.Document;
32	import org.w3c.dom.Element;
33	import org.w3c.dom.NodeList;
34
35	// General Java classes
36	import java.io.File;
37	import java.util.StringTokenizer;
38	import java.util.Vector;
39	import java.util.Set;
40	import java.util.Iterator;
41	import java.util.ArrayList;
42
43	/** Implements the generic retrieval and classifier services for GS2
44	* collections.
45	*
46	* @author <a href="mailto:[email protected]">Katherine Don</a>
47	* @author <a href="mailto:[email protected]">Michael Dewsnip</a>
48	*/
49
50	public abstract class AbstractGS2DocumentRetrieve
51	extends AbstractDocumentRetrieve {
52
53	protected static final String INDEX_STEM_ELEM = "indexStem";
54
55	// protected static final String EXTLINK_PARAM = "ext"; here or in base??
56	protected String index_stem = null;
57
58	protected GDBMWrapper gdbm_src = null;
59
60
61	/** constructor */
62	protected AbstractGS2DocumentRetrieve()
63	{
64	this.gdbm_src = new GDBMWrapper();
65	this.macro_resolver = new GS2MacroResolver(gdbm_src);
66	}
67
68
69	/** configure this service */
70	public boolean configure(Element info, Element extra_info)
71	{
72
73	System.out.println("Configuring AbstractGS2DocumentRetrieve...");
74	//this.config_info = info;
75
76	// Open GDBM database for querying
77	String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
78	if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
79	System.err.println("AbstractGS2DocumentRetrieve Error: Could not open GDBM database!");
80	return false;
81	}
82
83	// the index stem is either specified in the config file or is the collection name
84	Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
85	if (index_stem_elem != null) {
86	this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
87	}
88	if (this.index_stem == null \|\| this.index_stem.equals("")) {
89	System.err.println("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
90	this.index_stem = this.cluster_name;
91	}
92
93
94	return super.configure(info, extra_info);
95
96	}
97
98	/** if id ends in .fc, .pc etc, then translate it to the correct id */
99	protected String translateId(String node_id) {
100	return this.gdbm_src.translateOID(node_id);
101	}
102
103	/** if an id is not a greenstone id (an external id) then translate
104	it to a greenstone one*/
105	protected String translateExternalId(String node_id){
106	return this.gdbm_src.externalId2OID(node_id);
107	}
108
109	/** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
110	protected String getRootId(String node_id) {
111	return OID.getTop(node_id);
112	}
113	/** returns a list of the child ids in order, null if no children */
114	protected ArrayList getChildrenIds(String node_id) {
115	DBInfo info = this.gdbm_src.getInfo(node_id);
116	if (info == null) {
117	return null;
118	}
119
120	String contains = info.getInfo("contains");
121	if (contains.equals("")) {
122	return null;
123	}
124	ArrayList children = new ArrayList();
125	StringTokenizer st = new StringTokenizer(contains, ";");
126	while (st.hasMoreTokens()) {
127	String child_id = st.nextToken().replaceAll("\"", node_id);
128	children.add(child_id);
129	}
130	return children;
131
132	}
133	/** returns the node id of the parent node, null if no parent */
134	protected String getParentId(String node_id){
135	String parent = OID.getParent(node_id);
136	if (parent.equals(node_id)) {
137	return null;
138	}
139	return parent;
140	}
141
142	/** get the metadata for the classifier node node_id
143	* returns a metadataList element:
144	* <metadataList><metadata name="xxx">value</metadata></metadataList>
145	*/
146	// assumes only one value per metadata
147	protected Element getMetadataList(String node_id, boolean all_metadata,
148	ArrayList metadata_names) {
149	Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
150	DBInfo info = this.gdbm_src.getInfo(node_id);
151	if (info == null) {
152	return null;
153	}
154	String lang = "en"; // why do we need this??
155	if (all_metadata) {
156	// return everything out of the database
157	Set keys = info.getKeys();
158	Iterator it = keys.iterator();
159	while(it.hasNext()) {
160	String key = (String)it.next();
161	String value = info.getInfo(key);
162	GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(value, lang, MacroResolver.SCOPE_META, node_id));
163	}
164
165	} else {
166	for (int i=0; i<metadata_names.size(); i++) {
167	String meta_name = (String) metadata_names.get(i);
168	String value = getMetadata(node_id, info, meta_name, lang);
169	GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
170	}
171	}
172	return metadata_list;
173	}
174
175	/** returns the structural information asked for.
176	* info_type may be one of
177	* INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
178	*/
179	protected String getStructureInfo(String doc_id, String info_type) {
180	String value="";
181	if (info_type.equals(INFO_NUM_SIBS)) {
182	String parent_id = OID.getParent(doc_id);
183	if (parent_id.equals(doc_id)) {
184	value="0";
185	} else {
186	value = String.valueOf(getNumChildren(parent_id));
187	}
188	return value;
189	}
190
191	if (info_type.equals(INFO_NUM_CHILDREN)) {
192	return String.valueOf(getNumChildren(doc_id));
193	}
194
195
196	if (info_type.equals(INFO_SIB_POS)) {
197	String parent_id = OID.getParent(doc_id);
198	if (parent_id.equals(doc_id)) {
199	return "-1";
200	}
201
202	DBInfo info = this.gdbm_src.getInfo(parent_id);
203	if (info==null) {
204	return "-1";
205	}
206
207	String contains = info.getInfo("contains");
208	contains = contains.replaceAll("\"", parent_id);
209	String [] children = contains.split(";");
210	for (int i=0;i<children.length;i++) {
211	String child_id = children[i];
212	if (child_id.equals(doc_id)) {
213	return String.valueOf(i+1); // make it from 1 to length
214
215	}
216	}
217
218	return "-1";
219	} else {
220	return null;
221	}
222
223	}
224
225	protected int getNumChildren(String node_id) {
226	DBInfo info = this.gdbm_src.getInfo(node_id);
227	if (info == null) {
228	return 0;
229	}
230	String contains = info.getInfo("contains");
231	if (contains.equals("")) {
232	return 0;
233	}
234	String [] children = contains.split(";");
235	return children.length;
236	}
237
238	/** returns the document type of the doc that the specified node
239	belongs to. should be one of
240	GSXML.DOC_TYPE_SIMPLE,
241	GSXML.DOC_TYPE_PAGED,
242	GSXML.DOC_TYPE_HIERARCHY
243	*/
244	protected String getDocType(String node_id) {
245	DBInfo info = this.gdbm_src.getInfo(node_id);
246	if (info == null) {
247	return GSXML.DOC_TYPE_SIMPLE;
248	}
249	String doc_type = info.getInfo("doctype");
250	if (!doc_type.equals("")&&!doc_type.equals("doc")) {
251	return doc_type;
252	}
253
254	String top_id = OID.getTop(node_id);
255	boolean is_top = (top_id.equals(node_id) ? true : false);
256
257	String children = info.getInfo("contains");
258	boolean is_leaf = (children.equals("") ? true : false);
259
260	if (is_top && is_leaf) { // a single section document
261	return GSXML.DOC_TYPE_SIMPLE;
262	}
263
264	// now we just check the top node
265	if (!is_top) { // we need to look at the top info
266	info = this.gdbm_src.getInfo(top_id);
267	}
268	if (info == null) {
269	return GSXML.DOC_TYPE_HIERARCHY;
270	}
271
272	String childtype = info.getInfo("childtype");
273	if (childtype.equals("Paged")) {
274	return GSXML.DOC_TYPE_PAGED;
275	}
276	return GSXML.DOC_TYPE_HIERARCHY;
277	}
278
279	/** returns the content of a node
280	* should return a nodeContent element:
281	* <nodeContent>text content or other elements</nodeContent>
282	*/
283	abstract protected Element getNodeContent(String doc_id);
284
285	protected String getMetadata(String node_id, DBInfo info,
286	String metadata, String lang) {
287	boolean multiple = false;
288	String relation = "";
289	String separator = ", ";
290	int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
291	if (pos ==-1) {
292	// just a plain meta entry eg dc.Title
293	return macro_resolver.resolve((String)info.getInfo(metadata), lang, MacroResolver.SCOPE_META, node_id);
294	}
295
296	String temp = metadata.substring(0, pos);
297	metadata = metadata.substring(pos+1);
298	// check for all on the front
299	if (temp.equals("all")) {
300	multiple=true;
301	pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
302	if (pos ==-1) {
303	temp = "";
304	} else {
305	temp = metadata.substring(0, pos);
306	metadata = metadata.substring(pos+1);
307	}
308	}
309
310	// now check for relational info
311	if (temp.equals("parent") \|\| temp.equals("root") \|\| temp.equals( "ancestors")) { // "current" "siblings" "children" "descendents"
312	relation = temp;
313	pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
314	if (pos == -1) {
315	temp = "";
316	} else {
317	temp = metadata.substring(0, pos);
318	metadata = metadata.substring(pos+1);
319	}
320	}
321
322	// now look for separator info
323	if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
324	separator = temp.substring(1, temp.length()-1);
325
326	}
327
328	String relation_id = node_id;
329	if (relation.equals("parent") \|\| relation.equals("ancestors")) {
330	relation_id = OID.getParent(node_id);
331	// parent or ancestor does not include self
332	if (relation_id.equals(node_id)){
333	return "";
334	}
335	} else if (relation.equals("root")) {
336	relation_id = OID.getTop(node_id);
337	}
338
339	// now we either have a single node, or we have ancestors
340	DBInfo relation_info;
341	if (relation_id.equals(node_id)) {
342	relation_info = info;
343	} else {
344	relation_info = this.gdbm_src.getInfo(relation_id);
345	}
346	if (relation_info == null) {
347	return "";
348	}
349
350	StringBuffer result = new StringBuffer();
351
352	if (!multiple) {
353	result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
354	} else {
355	// we have multiple meta
356	Vector values = relation_info.getMultiInfo(metadata);
357	if (values != null) {
358	boolean first = true;
359	for (int i=0; i<values.size(); i++) {
360	if (first) {
361	first = false;
362	} else {
363	result.append(separator);
364	}
365	result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
366	}
367	}
368	}
369	// if not ancestors, then this is all we do
370	if (!relation.equals("ancestors")) {
371	return result.toString();
372	}
373
374	// now do the ancestors
375	String current_id = relation_id;
376	relation_id = OID.getParent(current_id);
377	while (!relation_id.equals(current_id)) {
378	relation_info = this.gdbm_src.getInfo(relation_id);
379	if (relation_info == null) return result.toString();
380	if (!multiple) {
381	result.insert(0, separator);
382	result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
383	} else {
384	Vector values = relation_info.getMultiInfo(metadata);
385	if (values != null) {
386	for (int i=values.size()-1; i>=0; i--) {
387	result.insert(0, separator);
388	result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
389	}
390	}
391
392	}
393	current_id = relation_id;
394	relation_id = OID.getParent(current_id);
395	}
396	return result.toString();
397	}
398
399
400	/** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
401	protected String resolveTextMacros(String doc_content, String doc_id, String lang)
402	{
403	DBInfo info = null;
404	if (doc_content.indexOf("_httpdocimg_")!=-1) {
405	String top_doc_id = OID.getTop(doc_id);
406	info = this.gdbm_src.getInfo(top_doc_id);
407	if (info == null) {
408	// perhaps we had per.iods in the ids - just try the current id
409	top_doc_id = doc_id;
410	info = this.gdbm_src.getInfo(top_doc_id);
411	}
412	if (info != null) {
413	String archivedir = info.getInfo("archivedir");
414	String image_dir = this.site_http_address + "/collect/"+this.cluster_name+"/index/assoc/"+archivedir;
415
416	// Resolve all "_httpdocimg_"s
417	doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
418	}
419	}
420	// resolve any collection specific macros
421	doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
422	return doc_content;
423	}
424
425	protected Element getInfo(String doc_id, String info_type) {
426
427	String value="";
428	if (info_type.equals(INFO_NUM_SIBS)) {
429	String parent_id = OID.getParent(doc_id);
430	if (parent_id.equals(doc_id)) {
431	value="0";
432	} else {
433	value = String.valueOf(getNumChildren(parent_id));
434	}
435	} else if (info_type.equals(INFO_NUM_CHILDREN)) {
436	value = String.valueOf(getNumChildren(doc_id));
437	} else if (info_type.equals(INFO_SIB_POS)) {
438	String parent_id = OID.getParent(doc_id);
439	if (parent_id.equals(doc_id)) {
440	value="-1";
441	} else {
442	DBInfo info = this.gdbm_src.getInfo(parent_id);
443	if (info==null) {
444	value ="-1";
445	} else {
446	String contains = info.getInfo("contains");
447	contains = contains.replaceAll("\"", parent_id);
448	String [] children = contains.split(";");
449	for (int i=0;i<children.length;i++) {
450	String child_id = children[i];
451	if (child_id.equals(doc_id)) {
452	value = String.valueOf(i+1); // make it from 1 to length
453	break;
454	}
455	}
456	}
457	}
458	} else {
459	return null;
460	}
461	Element info_elem = this.doc.createElement("info");
462	info_elem.setAttribute(GSXML.NAME_ATT, info_type);
463	info_elem.setAttribute(GSXML.VALUE_ATT, value);
464	return info_elem;
465	}
466
467	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: