source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 15326

Last change on this file since 15326 was 15326, checked in by kjdon, 16 years ago

added support for JDBM (or other) in place of GDBM: use SimpleCollectionDatabase instead of GDBMWrapper. new Element in buildConfig file: databaseType, set to gdbm or jdbm. If not present, assume gdbm. Also may be some small style changes to some files

  • Property svn:keywords set to Author Date Id Revision
File size: 15.3 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46/** Implements the generic retrieval and classifier services for GS2
47 * collections.
48 *
49 * @author <a href="mailto:[email protected]">Katherine Don</a>
50 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
51 */
52
53public abstract class AbstractGS2DocumentRetrieve
54 extends AbstractDocumentRetrieve {
55
56 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
57
58 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
59 protected String index_stem = null;
60
61 protected SimpleCollectionDatabase coll_db = null;
62
63
64 /** constructor */
65 protected AbstractGS2DocumentRetrieve()
66 {
67 }
68
69 public void cleanUp() {
70 super.cleanUp();
71 this.coll_db.closeDatabase();
72 }
73 /** configure this service */
74 public boolean configure(Element info, Element extra_info)
75 {
76 if (!super.configure(info, extra_info)){
77 return false;
78 }
79
80 logger.info("Configuring AbstractGS2DocumentRetrieve...");
81 //this.config_info = info;
82
83 // the index stem is either specified in the config file or is the collection name
84 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
85 if (index_stem_elem != null) {
86 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
87 }
88 if (this.index_stem == null || this.index_stem.equals("")) {
89 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
90 this.index_stem = this.cluster_name;
91 }
92
93 // find out what kind of database we have
94 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
95 String database_type = null;
96 if (database_type_elem != null) {
97 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
98 }
99 if (database_type == null || database_type.equals("")) {
100 database_type = "gdbm"; // the default
101 }
102 coll_db = new SimpleCollectionDatabase(database_type);
103 if (coll_db == null) {
104 logger.error("Couldn't create the collection database of type "+database_type);
105 return false;
106 }
107
108 // Open database for querying
109 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
110 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
111 logger.error("Could not open collection database!");
112 return false;
113 }
114 this.macro_resolver = new GS2MacroResolver(this.coll_db);
115
116 return true;
117 }
118
119 /** if id ends in .fc, .pc etc, then translate it to the correct id */
120 protected String translateId(String node_id) {
121 return this.coll_db.translateOID(node_id);
122 }
123
124 /** if an id is not a greenstone id (an external id) then translate
125 it to a greenstone one*/
126 protected String translateExternalId(String node_id){
127 return this.coll_db.externalId2OID(node_id);
128 }
129
130 /** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
131 protected String getRootId(String node_id) {
132 return OID.getTop(node_id);
133 }
134 /** returns a list of the child ids in order, null if no children */
135 protected ArrayList getChildrenIds(String node_id) {
136 DBInfo info = this.coll_db.getInfo(node_id);
137 if (info == null) {
138 return null;
139 }
140
141 String contains = info.getInfo("contains");
142 if (contains.equals("")) {
143 return null;
144 }
145 ArrayList children = new ArrayList();
146 StringTokenizer st = new StringTokenizer(contains, ";");
147 while (st.hasMoreTokens()) {
148 String child_id = st.nextToken().replaceAll("\"", node_id);
149 children.add(child_id);
150 }
151 return children;
152
153 }
154 /** returns the node id of the parent node, null if no parent */
155 protected String getParentId(String node_id){
156 String parent = OID.getParent(node_id);
157 if (parent.equals(node_id)) {
158 return null;
159 }
160 return parent;
161 }
162
163 /** get the metadata for the classifier node node_id
164 * returns a metadataList element:
165 * <metadataList><metadata name="xxx">value</metadata></metadataList>
166 */
167 // assumes only one value per metadata
168 protected Element getMetadataList(String node_id, boolean all_metadata,
169 ArrayList metadata_names)
170 throws GSException {
171 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
172 DBInfo info = this.coll_db.getInfo(node_id);
173 if (info == null) {
174 return null;
175 }
176 String lang = "en"; // why do we need this??
177 if (all_metadata) {
178 // return everything out of the database
179 Set keys = info.getKeys();
180 Iterator it = keys.iterator();
181 while(it.hasNext()) {
182 String key = (String)it.next();
183 //String value = info.getInfo(key);
184 Vector values = info.getMultiInfo(key);
185 for(int i=0; i<values.size(); i++) {
186 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
187 }
188 }
189
190 } else {
191 for (int i=0; i<metadata_names.size(); i++) {
192 String meta_name = (String) metadata_names.get(i);
193 String value = getMetadata(node_id, info, meta_name, lang);
194 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
195 }
196 }
197 return metadata_list;
198 }
199
200 /** returns the structural information asked for.
201 * info_type may be one of
202 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
203 */
204 protected String getStructureInfo(String doc_id, String info_type) {
205 String value="";
206 if (info_type.equals(INFO_NUM_SIBS)) {
207 String parent_id = OID.getParent(doc_id);
208 if (parent_id.equals(doc_id)) {
209 value="0";
210 } else {
211 value = String.valueOf(getNumChildren(parent_id));
212 }
213 return value;
214 }
215
216 if (info_type.equals(INFO_NUM_CHILDREN)) {
217 return String.valueOf(getNumChildren(doc_id));
218 }
219
220
221 if (info_type.equals(INFO_SIB_POS)) {
222 String parent_id = OID.getParent(doc_id);
223 if (parent_id.equals(doc_id)) {
224 return "-1";
225 }
226
227 DBInfo info = this.coll_db.getInfo(parent_id);
228 if (info==null) {
229 return "-1";
230 }
231
232 String contains = info.getInfo("contains");
233 contains = contains.replaceAll("\"", parent_id);
234 String [] children = contains.split(";");
235 for (int i=0;i<children.length;i++) {
236 String child_id = children[i];
237 if (child_id.equals(doc_id)) {
238 return String.valueOf(i+1); // make it from 1 to length
239
240 }
241 }
242
243 return "-1";
244 } else {
245 return null;
246 }
247
248 }
249
250 protected int getNumChildren(String node_id) {
251 DBInfo info = this.coll_db.getInfo(node_id);
252 if (info == null) {
253 return 0;
254 }
255 String contains = info.getInfo("contains");
256 if (contains.equals("")) {
257 return 0;
258 }
259 String [] children = contains.split(";");
260 return children.length;
261 }
262
263 /** returns the document type of the doc that the specified node
264 belongs to. should be one of
265 GSXML.DOC_TYPE_SIMPLE,
266 GSXML.DOC_TYPE_PAGED,
267 GSXML.DOC_TYPE_HIERARCHY
268 */
269 protected String getDocType(String node_id) {
270 DBInfo info = this.coll_db.getInfo(node_id);
271 if (info == null) {
272 return GSXML.DOC_TYPE_SIMPLE;
273 }
274 String doc_type = info.getInfo("doctype");
275 if (!doc_type.equals("")&&!doc_type.equals("doc")) {
276 return doc_type;
277 }
278
279 String top_id = OID.getTop(node_id);
280 boolean is_top = (top_id.equals(node_id) ? true : false);
281
282 String children = info.getInfo("contains");
283 boolean is_leaf = (children.equals("") ? true : false);
284
285 if (is_top && is_leaf) { // a single section document
286 return GSXML.DOC_TYPE_SIMPLE;
287 }
288
289 // now we just check the top node
290 if (!is_top) { // we need to look at the top info
291 info = this.coll_db.getInfo(top_id);
292 }
293 if (info == null) {
294 return GSXML.DOC_TYPE_HIERARCHY;
295 }
296
297 String childtype = info.getInfo("childtype");
298 if (childtype.equals("Paged")) {
299 return GSXML.DOC_TYPE_PAGED;
300 }
301 return GSXML.DOC_TYPE_HIERARCHY;
302 }
303
304 /** returns the content of a node
305 * should return a nodeContent element:
306 * <nodeContent>text content or other elements</nodeContent>
307 */
308 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
309
310 protected String getMetadata(String node_id, DBInfo info,
311 String metadata, String lang) {
312 boolean multiple = false;
313 String relation = "";
314 String separator = ", ";
315 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
316 if (pos ==-1) {
317 Vector values = info.getMultiInfo(metadata);
318 if (values !=null){
319 // just a plain meta entry eg dc.Title
320 StringBuffer result = new StringBuffer();
321 boolean first = true;
322 for (int i=0; i<values.size(); i++) {
323 if (first) {
324 first = false;
325 } else {
326 result.append(separator);
327 }
328 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
329 }
330 return result.toString();
331 }
332 else{
333 String result = info.getInfo(metadata);
334 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
335 }
336 }
337
338 String temp = metadata.substring(0, pos);
339 metadata = metadata.substring(pos+1);
340 // check for all on the front
341 if (temp.equals("all")) {
342 multiple=true;
343 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
344 if (pos ==-1) {
345 temp = "";
346 } else {
347 temp = metadata.substring(0, pos);
348 metadata = metadata.substring(pos+1);
349 }
350 }
351
352 // now check for relational info
353 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendants"
354 relation = temp;
355 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
356 if (pos == -1) {
357 temp = "";
358 } else {
359 temp = metadata.substring(0, pos);
360 metadata = metadata.substring(pos+1);
361 }
362 }
363
364 // now look for separator info
365 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
366 separator = temp.substring(1, temp.length()-1);
367
368 }
369
370 String relation_id = node_id;
371 if (relation.equals("parent") || relation.equals("ancestors")) {
372 relation_id = OID.getParent(node_id);
373 // parent or ancestor does not include self
374 if (relation_id.equals(node_id)){
375 return "";
376 }
377 } else if (relation.equals("root")) {
378 relation_id = OID.getTop(node_id);
379 }
380
381 // now we either have a single node, or we have ancestors
382 DBInfo relation_info;
383 if (relation_id.equals(node_id)) {
384 relation_info = info;
385 } else {
386 relation_info = this.coll_db.getInfo(relation_id);
387 }
388 if (relation_info == null) {
389 return "";
390 }
391
392 StringBuffer result = new StringBuffer();
393
394 if (!multiple) {
395 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
396 } else {
397 // we have multiple meta
398 Vector values = relation_info.getMultiInfo(metadata);
399 if (values != null) {
400 boolean first = true;
401 for (int i=0; i<values.size(); i++) {
402 if (first) {
403 first = false;
404 } else {
405 result.append(separator);
406 }
407 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
408 }
409 }
410 logger.info(result);
411 }
412 // if not ancestors, then this is all we do
413 if (!relation.equals("ancestors")) {
414 return result.toString();
415 }
416
417 // now do the ancestors
418 String current_id = relation_id;
419 relation_id = OID.getParent(current_id);
420 while (!relation_id.equals(current_id)) {
421 relation_info = this.coll_db.getInfo(relation_id);
422 if (relation_info == null) return result.toString();
423 if (!multiple) {
424 result.insert(0, separator);
425 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
426 } else {
427 Vector values = relation_info.getMultiInfo(metadata);
428 if (values != null) {
429 for (int i=values.size()-1; i>=0; i--) {
430 result.insert(0, separator);
431 result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
432 }
433 }
434
435 }
436 current_id = relation_id;
437 relation_id = OID.getParent(current_id);
438 }
439 return result.toString();
440 }
441
442
443 /** needs to get info from collection database - if the calling code gets it already it may pay to pass it in instead */
444 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
445 {
446 // resolve any collection specific macros
447 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
448 return doc_content;
449 }
450
451 protected Element getInfo(String doc_id, String info_type) {
452
453 String value="";
454 if (info_type.equals(INFO_NUM_SIBS)) {
455 String parent_id = OID.getParent(doc_id);
456 if (parent_id.equals(doc_id)) {
457 value="0";
458 } else {
459 value = String.valueOf(getNumChildren(parent_id));
460 }
461 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
462 value = String.valueOf(getNumChildren(doc_id));
463 } else if (info_type.equals(INFO_SIB_POS)) {
464 String parent_id = OID.getParent(doc_id);
465 if (parent_id.equals(doc_id)) {
466 value="-1";
467 } else {
468 DBInfo info = this.coll_db.getInfo(parent_id);
469 if (info==null) {
470 value ="-1";
471 } else {
472 String contains = info.getInfo("contains");
473 contains = contains.replaceAll("\"", parent_id);
474 String [] children = contains.split(";");
475 for (int i=0;i<children.length;i++) {
476 String child_id = children[i];
477 if (child_id.equals(doc_id)) {
478 value = String.valueOf(i+1); // make it from 1 to length
479 break;
480 }
481 }
482 }
483 }
484 } else {
485 return null;
486 }
487 Element info_elem = this.doc.createElement("info");
488 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
489 info_elem.setAttribute(GSXML.VALUE_ATT, value);
490 return info_elem;
491 }
492
493 protected String getHrefOID(String href_url){
494 return this.coll_db.docnum2OID(href_url);
495 }
496
497}
Note: See TracBrowser for help on using the repository browser.