Context Navigation

source: branches/ant-install-branch/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 9529

Last change on this file since 9529 was 9529, checked in by kjdon, 19 years ago
changed the location of the GDBMWrapper and DBInfo classes, so had to change some of the import statements
Property svn:keywords set to `Author Date Id Revision`
File size: 14.3 KB

Line
1	/*
2	* AbstractGS2DocumentRetrieve.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.util.GSXML;
23	import org.greenstone.gsdl3.util.GSFile;
24	import org.greenstone.gsdl3.util.OID;
25	import org.greenstone.gsdl3.util.MacroResolver;
26	import org.greenstone.gsdl3.util.GS2MacroResolver;
27	import org.greenstone.gsdl3.util.GSConstants;
28	import org.greenstone.gsdl3.util.GDBMWrapper;
29	import org.greenstone.gsdl3.util.DBInfo;
30	// XML classes
31	import org.w3c.dom.Document;
32	import org.w3c.dom.Element;
33	import org.w3c.dom.NodeList;
34
35	// General Java classes
36	import java.io.File;
37	import java.util.StringTokenizer;
38	import java.util.Vector;
39	import java.util.Set;
40	import java.util.Iterator;
41	import java.util.ArrayList;
42
43	/** Implements the generic retrieval and classifier services for GS2
44	* collections.
45	*
46	* @author <a href="mailto:[email protected]">Katherine Don</a>
47	* @author <a href="mailto:[email protected]">Michael Dewsnip</a>
48	*/
49
50	public abstract class AbstractGS2DocumentRetrieve
51	extends AbstractDocumentRetrieve {
52
53	protected static final String INDEX_STEM_ELEM = "indexStem";
54
55	// protected static final String EXTLINK_PARAM = "ext"; here or in base??
56	protected String index_stem = null;
57
58	protected GDBMWrapper gdbm_src = null;
59
60
61	/** constructor */
62	protected AbstractGS2DocumentRetrieve()
63	{
64	this.gdbm_src = new GDBMWrapper();
65	this.macro_resolver = new GS2MacroResolver(gdbm_src);
66	}
67
68
69	/** configure this service */
70	public boolean configure(Element info, Element extra_info)
71	{
72
73	System.out.println("Configuring AbstractGS2DocumentRetrieve...");
74	//this.config_info = info;
75
76	// Open GDBM database for querying
77	String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
78	if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
79	System.err.println("AbstractGS2DocumentRetrieve Error: Could not open GDBM database!");
80	return false;
81	}
82
83	// the index stem is either specified in the config file or is the collection name
84	Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
85	if (index_stem_elem != null) {
86	this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
87	}
88	if (this.index_stem == null \|\| this.index_stem.equals("")) {
89	System.err.println("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
90	this.index_stem = this.cluster_name;
91	}
92
93
94	return super.configure(info, extra_info);
95
96	}
97
98	/** if id ends in .fc, .pc etc, then translate it to the correct id */
99	protected String translateId(String node_id) {
100	return this.gdbm_src.translateOID(node_id);
101	}
102
103	/** if an id is not a greenstone id (an external id) then translate
104	it to a greenstone one*/
105	protected String translateExternalId(String node_id){
106	return this.gdbm_src.externalId2OID(node_id);
107	}
108
109	/** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
110	protected String getRootId(String node_id) {
111	return OID.getTop(node_id);
112	}
113	/** returns a list of the child ids in order, null if no children */
114	protected ArrayList getChildrenIds(String node_id) {
115	DBInfo info = this.gdbm_src.getInfo(node_id);
116	if (info == null) {
117	return null;
118	}
119
120	String contains = info.getInfo("contains");
121	if (contains.equals("")) {
122	return null;
123	}
124	ArrayList children = new ArrayList();
125	StringTokenizer st = new StringTokenizer(contains, ";");
126	while (st.hasMoreTokens()) {
127	String child_id = st.nextToken().replaceAll("\"", node_id);
128	children.add(child_id);
129	}
130	return children;
131
132	}
133	/** returns the node id of the parent node, null if no parent */
134	protected String getParentId(String node_id){
135	String parent = OID.getParent(node_id);
136	if (parent.equals(node_id)) {
137	return null;
138	}
139	return parent;
140	}
141
142	/** get the metadata for the classifier node node_id
143	* returns a metadataList element:
144	* <metadataList><metadata name="xxx">value</metadata></metadataList>
145	*/
146	// assumes only one value per metadata
147	protected Element getMetadataList(String node_id, boolean all_metadata,
148	ArrayList metadata_names) {
149	Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
150	DBInfo info = this.gdbm_src.getInfo(node_id);
151	if (info == null) {
152	return null;
153	}
154	String lang = "en"; // why do we need this??
155	if (all_metadata) {
156	// return everything out of the database
157	Set keys = info.getKeys();
158	Iterator it = keys.iterator();
159	while(it.hasNext()) {
160	String key = (String)it.next();
161	String value = info.getInfo(key);
162	GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(value, lang, MacroResolver.SCOPE_META, node_id));
163	}
164
165	} else {
166	for (int i=0; i<metadata_names.size(); i++) {
167	String meta_name = (String) metadata_names.get(i);
168	String value = getMetadata(node_id, info, meta_name, lang);
169	GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
170	}
171	}
172	return metadata_list;
173	}
174
175	/** returns the structural information asked for.
176	* info_type may be one of
177	* INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
178	*/
179	protected String getStructureInfo(String doc_id, String info_type) {
180	String value="";
181	if (info_type.equals(INFO_NUM_SIBS)) {
182	String parent_id = OID.getParent(doc_id);
183	if (parent_id.equals(doc_id)) {
184	value="0";
185	} else {
186	value = String.valueOf(getNumChildren(parent_id));
187	}
188	return value;
189	}
190
191	if (info_type.equals(INFO_NUM_CHILDREN)) {
192	return String.valueOf(getNumChildren(doc_id));
193	}
194
195
196	if (info_type.equals(INFO_SIB_POS)) {
197	String parent_id = OID.getParent(doc_id);
198	if (parent_id.equals(doc_id)) {
199	return "-1";
200	}
201
202	DBInfo info = this.gdbm_src.getInfo(parent_id);
203	if (info==null) {
204	return "-1";
205	}
206
207	String contains = info.getInfo("contains");
208	contains = contains.replaceAll("\"", parent_id);
209	String [] children = contains.split(";");
210	for (int i=0;i<children.length;i++) {
211	String child_id = children[i];
212	if (child_id.equals(doc_id)) {
213	return String.valueOf(i+1); // make it from 1 to length
214
215	}
216	}
217
218	return "-1";
219	} else {
220	return null;
221	}
222
223	}
224
225	protected int getNumChildren(String node_id) {
226	DBInfo info = this.gdbm_src.getInfo(node_id);
227	if (info == null) {
228	return 0;
229	}
230	String contains = info.getInfo("contains");
231	if (contains.equals("")) {
232	return 0;
233	}
234	String [] children = contains.split(";");
235	return children.length;
236	}
237
238	/** returns the document type of the doc that the specified node
239	belongs to. should be one of
240	GSXML.DOC_TYPE_SIMPLE,
241	GSXML.DOC_TYPE_PAGED,
242	GSXML.DOC_TYPE_HIERARCHY
243	*/
244	protected String getDocType(String node_id) {
245	DBInfo info = this.gdbm_src.getInfo(node_id);
246	if (info == null) {
247	return GSXML.DOC_TYPE_SIMPLE;
248	}
249	String doc_type = info.getInfo("doctype");
250	if (!doc_type.equals("")&&!doc_type.equals("doc")) {
251	return doc_type;
252	}
253
254	String top_id = OID.getTop(node_id);
255	boolean is_top = (top_id.equals(node_id) ? true : false);
256
257	String children = info.getInfo("contains");
258	boolean is_leaf = (children.equals("") ? true : false);
259
260	if (is_top && is_leaf) { // a single section document
261	return GSXML.DOC_TYPE_SIMPLE;
262	}
263
264	// now we just check the top node
265	if (!is_top) { // we need to look at the top info
266	info = this.gdbm_src.getInfo(top_id);
267	}
268	if (info == null) {
269	return GSXML.DOC_TYPE_HIERARCHY;
270	}
271
272	String childtype = info.getInfo("childtype");
273	if (childtype.equals("Paged")) {
274	return GSXML.DOC_TYPE_PAGED;
275	}
276	return GSXML.DOC_TYPE_HIERARCHY;
277	}
278
279	/** returns the content of a node
280	* should return a nodeContent element:
281	* <nodeContent>text content or other elements</nodeContent>
282	*/
283	abstract protected Element getNodeContent(String doc_id);
284
285	protected String getMetadata(String node_id, DBInfo info,
286	String metadata, String lang) {
287	boolean multiple = false;
288	String relation = "";
289	String separator = ", ";
290	int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
291	if (pos ==-1) {
292	// just a plain meta entry eg dc.Title
293	return macro_resolver.resolve((String)info.getInfo(metadata), lang, MacroResolver.SCOPE_META, node_id);
294	}
295
296	String temp = metadata.substring(0, pos);
297	metadata = metadata.substring(pos+1);
298	// check for all on the front
299	if (temp.equals("all")) {
300	multiple=true;
301	pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
302	if (pos ==-1) {
303	temp = "";
304	} else {
305	temp = metadata.substring(0, pos);
306	metadata = metadata.substring(pos+1);
307	}
308	}
309
310	// now check for relational info
311	if (temp.equals("parent") \|\| temp.equals("root") \|\| temp.equals( "ancestors")) { // "current" "siblings" "children" "descendents"
312	relation = temp;
313	pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
314	if (pos == -1) {
315	temp = "";
316	} else {
317	temp = metadata.substring(0, pos);
318	metadata = metadata.substring(pos+1);
319	}
320	}
321
322	// now look for separator info
323	if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
324	separator = temp.substring(1, temp.length()-1);
325
326	}
327
328	String relation_id = node_id;
329	if (relation.equals("parent") \|\| relation.equals("ancestors")) {
330	relation_id = OID.getParent(node_id);
331	// parent or ancestor does not include self
332	if (relation_id.equals(node_id)){
333	return "";
334	}
335	} else if (relation.equals("root")) {
336	relation_id = OID.getTop(node_id);
337	}
338
339	// now we either have a single node, or we have ancestors
340	DBInfo relation_info;
341	if (relation_id.equals(node_id)) {
342	relation_info = info;
343	} else {
344	relation_info = this.gdbm_src.getInfo(relation_id);
345	}
346	if (relation_info == null) {
347	return "";
348	}
349
350	StringBuffer result = new StringBuffer();
351
352	if (!multiple) {
353	result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
354	} else {
355	// we have multiple meta
356	Vector values = relation_info.getMultiInfo(metadata);
357	if (values != null) {
358	boolean first = true;
359	for (int i=0; i<values.size(); i++) {
360	if (first) {
361	first = false;
362	} else {
363	result.append(separator);
364	}
365	result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
366	}
367	}
368	}
369	// if not ancestors, then this is all we do
370	if (!relation.equals("ancestors")) {
371	return result.toString();
372	}
373
374	// now do the ancestors
375	String current_id = relation_id;
376	relation_id = OID.getParent(current_id);
377	while (!relation_id.equals(current_id)) {
378	relation_info = this.gdbm_src.getInfo(relation_id);
379	if (relation_info == null) return result.toString();
380	if (!multiple) {
381	result.insert(0, separator);
382	result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
383	} else {
384	Vector values = relation_info.getMultiInfo(metadata);
385	if (values != null) {
386	for (int i=values.size()-1; i>=0; i--) {
387	result.insert(0, separator);
388	result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
389	}
390	}
391
392	}
393	current_id = relation_id;
394	relation_id = OID.getParent(current_id);
395	}
396	return result.toString();
397	}
398
399
400	/** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
401	protected String resolveTextMacros(String doc_content, String doc_id, String lang)
402	{
403	DBInfo info = null;
404	if (doc_content.indexOf("_httpdocimg_")!=-1) {
405	String top_doc_id = OID.getTop(doc_id);
406	info = this.gdbm_src.getInfo(top_doc_id);
407	if (info == null) {
408	// perhaps we had per.iods in the ids - just try the current id
409	top_doc_id = doc_id;
410	info = this.gdbm_src.getInfo(top_doc_id);
411	}
412	if (info != null) {
413	String archivedir = info.getInfo("archivedir");
414	String image_dir = this.site_http_address + "/collect/"+this.cluster_name+"/index/assoc/"+archivedir;
415
416	// Resolve all "_httpdocimg_"s
417	doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
418	}
419	}
420	// resolve any collection specific macros
421	doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
422	return doc_content;
423	}
424
425	protected Element getInfo(String doc_id, String info_type) {
426
427	String value="";
428	if (info_type.equals(INFO_NUM_SIBS)) {
429	String parent_id = OID.getParent(doc_id);
430	if (parent_id.equals(doc_id)) {
431	value="0";
432	} else {
433	value = String.valueOf(getNumChildren(parent_id));
434	}
435	} else if (info_type.equals(INFO_NUM_CHILDREN)) {
436	value = String.valueOf(getNumChildren(doc_id));
437	} else if (info_type.equals(INFO_SIB_POS)) {
438	String parent_id = OID.getParent(doc_id);
439	if (parent_id.equals(doc_id)) {
440	value="-1";
441	} else {
442	DBInfo info = this.gdbm_src.getInfo(parent_id);
443	if (info==null) {
444	value ="-1";
445	} else {
446	String contains = info.getInfo("contains");
447	contains = contains.replaceAll("\"", parent_id);
448	String [] children = contains.split(";");
449	for (int i=0;i<children.length;i++) {
450	String child_id = children[i];
451	if (child_id.equals(doc_id)) {
452	value = String.valueOf(i+1); // make it from 1 to length
453	break;
454	}
455	}
456	}
457	}
458	} else {
459	return null;
460	}
461	Element info_elem = this.doc.createElement("info");
462	info_elem.setAttribute(GSXML.NAME_ATT, info_type);
463	info_elem.setAttribute(GSXML.VALUE_ATT, value);
464	return info_elem;
465	}
466
467	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: