source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 22974

Last change on this file since 22974 was 22974, checked in by davidb, 14 years ago

Code used to test 'coll_db == null' to determine if a database was opened correctly. Since this value is returned by a constructor, it is always non-null, even when it failed to open the database. The routine databaseOK() was added to core class, and is now used in these routines instead of testing for null

  • Property svn:keywords set to Author Date Id Revision
File size: 15.5 KB
RevLine 
[8959]1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
[9874]22import org.greenstone.gsdl3.core.GSException;
[8959]23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
[15326]29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
[9874]30import org.greenstone.gsdl3.util.DBInfo;
[8959]31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
[13124]44import org.apache.log4j.*;
45
[8959]46/** Implements the generic retrieval and classifier services for GS2
47 * collections.
48 *
[21663]49 * @author Katherine Don
50 * @author Michael Dewsnip
[8959]51 */
52
53public abstract class AbstractGS2DocumentRetrieve
54 extends AbstractDocumentRetrieve {
55
[13270]56 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
[13124]57
[9000]58 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
59 protected String index_stem = null;
60
[15326]61 protected SimpleCollectionDatabase coll_db = null;
[8959]62
63
64 /** constructor */
65 protected AbstractGS2DocumentRetrieve()
66 {
[15754]67 this.macro_resolver = new GS2MacroResolver();
[8959]68 }
69
[9874]70 public void cleanUp() {
[15326]71 super.cleanUp();
72 this.coll_db.closeDatabase();
[9874]73 }
[8959]74 /** configure this service */
75 public boolean configure(Element info, Element extra_info)
76 {
[10093]77 if (!super.configure(info, extra_info)){
78 return false;
79 }
[11265]80
[13124]81 logger.info("Configuring AbstractGS2DocumentRetrieve...");
[8959]82 //this.config_info = info;
[9000]83
84 // the index stem is either specified in the config file or is the collection name
[10651]85 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
[9000]86 if (index_stem_elem != null) {
87 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
88 }
89 if (this.index_stem == null || this.index_stem.equals("")) {
[13124]90 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
[9000]91 this.index_stem = this.cluster_name;
92 }
[10651]93
[15326]94 // find out what kind of database we have
95 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
96 String database_type = null;
97 if (database_type_elem != null) {
98 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
99 }
100 if (database_type == null || database_type.equals("")) {
101 database_type = "gdbm"; // the default
102 }
103 coll_db = new SimpleCollectionDatabase(database_type);
[22974]104 if (!coll_db.databaseOK()) {
[15326]105 logger.error("Couldn't create the collection database of type "+database_type);
106 return false;
107 }
108
109 // Open database for querying
110 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
111 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
112 logger.error("Could not open collection database!");
[10651]113 return false;
114 }
115
[15770]116 // we need to set the database for our GS2 macro resolver
117 GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver)this.macro_resolver;
118 gs2_macro_resolver.setDB(this.coll_db);
119
[10093]120 return true;
[8959]121 }
122
123 /** if id ends in .fc, .pc etc, then translate it to the correct id */
124 protected String translateId(String node_id) {
[22319]125 return OID.translateOID(this.coll_db, node_id); //return this.coll_db.translateOID(node_id);
[8959]126 }
127
128 /** if an id is not a greenstone id (an external id) then translate
129 it to a greenstone one*/
130 protected String translateExternalId(String node_id){
[15326]131 return this.coll_db.externalId2OID(node_id);
[8959]132 }
133
134 /** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
135 protected String getRootId(String node_id) {
136 return OID.getTop(node_id);
137 }
138 /** returns a list of the child ids in order, null if no children */
139 protected ArrayList getChildrenIds(String node_id) {
[15326]140 DBInfo info = this.coll_db.getInfo(node_id);
[8959]141 if (info == null) {
142 return null;
143 }
144
145 String contains = info.getInfo("contains");
146 if (contains.equals("")) {
147 return null;
148 }
149 ArrayList children = new ArrayList();
150 StringTokenizer st = new StringTokenizer(contains, ";");
151 while (st.hasMoreTokens()) {
152 String child_id = st.nextToken().replaceAll("\"", node_id);
153 children.add(child_id);
154 }
155 return children;
156
157 }
158 /** returns the node id of the parent node, null if no parent */
159 protected String getParentId(String node_id){
160 String parent = OID.getParent(node_id);
161 if (parent.equals(node_id)) {
162 return null;
163 }
164 return parent;
165 }
166
167 /** get the metadata for the classifier node node_id
168 * returns a metadataList element:
169 * <metadataList><metadata name="xxx">value</metadata></metadataList>
170 */
171 // assumes only one value per metadata
172 protected Element getMetadataList(String node_id, boolean all_metadata,
[9874]173 ArrayList metadata_names)
174 throws GSException {
[8959]175 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
[15326]176 DBInfo info = this.coll_db.getInfo(node_id);
[8959]177 if (info == null) {
178 return null;
179 }
180 String lang = "en"; // why do we need this??
181 if (all_metadata) {
182 // return everything out of the database
183 Set keys = info.getKeys();
184 Iterator it = keys.iterator();
185 while(it.hasNext()) {
186 String key = (String)it.next();
[10334]187 //String value = info.getInfo(key);
188 Vector values = info.getMultiInfo(key);
189 for(int i=0; i<values.size(); i++) {
190 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
191 }
[8959]192 }
193
194 } else {
195 for (int i=0; i<metadata_names.size(); i++) {
[14035]196 String meta_name = (String) metadata_names.get(i);
[8959]197 String value = getMetadata(node_id, info, meta_name, lang);
198 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
199 }
200 }
201 return metadata_list;
202 }
203
204 /** returns the structural information asked for.
205 * info_type may be one of
206 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
207 */
208 protected String getStructureInfo(String doc_id, String info_type) {
209 String value="";
210 if (info_type.equals(INFO_NUM_SIBS)) {
211 String parent_id = OID.getParent(doc_id);
212 if (parent_id.equals(doc_id)) {
213 value="0";
214 } else {
215 value = String.valueOf(getNumChildren(parent_id));
216 }
217 return value;
218 }
219
220 if (info_type.equals(INFO_NUM_CHILDREN)) {
221 return String.valueOf(getNumChildren(doc_id));
222 }
223
224
225 if (info_type.equals(INFO_SIB_POS)) {
226 String parent_id = OID.getParent(doc_id);
227 if (parent_id.equals(doc_id)) {
228 return "-1";
229 }
230
[15326]231 DBInfo info = this.coll_db.getInfo(parent_id);
[8959]232 if (info==null) {
233 return "-1";
234 }
235
236 String contains = info.getInfo("contains");
237 contains = contains.replaceAll("\"", parent_id);
238 String [] children = contains.split(";");
239 for (int i=0;i<children.length;i++) {
240 String child_id = children[i];
241 if (child_id.equals(doc_id)) {
242 return String.valueOf(i+1); // make it from 1 to length
243
244 }
245 }
246
247 return "-1";
248 } else {
249 return null;
250 }
251
252 }
253
254 protected int getNumChildren(String node_id) {
[15326]255 DBInfo info = this.coll_db.getInfo(node_id);
[8959]256 if (info == null) {
257 return 0;
258 }
259 String contains = info.getInfo("contains");
260 if (contains.equals("")) {
261 return 0;
262 }
263 String [] children = contains.split(";");
264 return children.length;
265 }
266
267 /** returns the document type of the doc that the specified node
268 belongs to. should be one of
269 GSXML.DOC_TYPE_SIMPLE,
270 GSXML.DOC_TYPE_PAGED,
271 GSXML.DOC_TYPE_HIERARCHY
272 */
273 protected String getDocType(String node_id) {
[15326]274 DBInfo info = this.coll_db.getInfo(node_id);
[8959]275 if (info == null) {
276 return GSXML.DOC_TYPE_SIMPLE;
277 }
278 String doc_type = info.getInfo("doctype");
279 if (!doc_type.equals("")&&!doc_type.equals("doc")) {
280 return doc_type;
281 }
282
283 String top_id = OID.getTop(node_id);
284 boolean is_top = (top_id.equals(node_id) ? true : false);
285
286 String children = info.getInfo("contains");
287 boolean is_leaf = (children.equals("") ? true : false);
288
289 if (is_top && is_leaf) { // a single section document
290 return GSXML.DOC_TYPE_SIMPLE;
291 }
292
293 // now we just check the top node
294 if (!is_top) { // we need to look at the top info
[15326]295 info = this.coll_db.getInfo(top_id);
[8959]296 }
297 if (info == null) {
298 return GSXML.DOC_TYPE_HIERARCHY;
299 }
300
301 String childtype = info.getInfo("childtype");
302 if (childtype.equals("Paged")) {
303 return GSXML.DOC_TYPE_PAGED;
304 }
305 return GSXML.DOC_TYPE_HIERARCHY;
306 }
307
308 /** returns the content of a node
309 * should return a nodeContent element:
310 * <nodeContent>text content or other elements</nodeContent>
311 */
[13575]312 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
[8959]313
314 protected String getMetadata(String node_id, DBInfo info,
315 String metadata, String lang) {
316 boolean multiple = false;
317 String relation = "";
318 String separator = ", ";
319 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
320 if (pos ==-1) {
[14035]321 Vector values = info.getMultiInfo(metadata);
[14185]322 if (values !=null){
323 // just a plain meta entry eg dc.Title
324 StringBuffer result = new StringBuffer();
325 boolean first = true;
326 for (int i=0; i<values.size(); i++) {
327 if (first) {
328 first = false;
329 } else {
330 result.append(separator);
331 }
332 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
333 }
334 return result.toString();
335 }
336 else{
337 String result = info.getInfo(metadata);
338 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
339 }
[8959]340 }
[14035]341
[8959]342 String temp = metadata.substring(0, pos);
343 metadata = metadata.substring(pos+1);
344 // check for all on the front
345 if (temp.equals("all")) {
346 multiple=true;
347 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
348 if (pos ==-1) {
349 temp = "";
350 } else {
351 temp = metadata.substring(0, pos);
352 metadata = metadata.substring(pos+1);
353 }
354 }
355
356 // now check for relational info
[15208]357 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendants"
[8959]358 relation = temp;
359 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
360 if (pos == -1) {
361 temp = "";
362 } else {
363 temp = metadata.substring(0, pos);
364 metadata = metadata.substring(pos+1);
365 }
366 }
367
368 // now look for separator info
369 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
370 separator = temp.substring(1, temp.length()-1);
371
372 }
373
374 String relation_id = node_id;
375 if (relation.equals("parent") || relation.equals("ancestors")) {
376 relation_id = OID.getParent(node_id);
377 // parent or ancestor does not include self
378 if (relation_id.equals(node_id)){
379 return "";
380 }
381 } else if (relation.equals("root")) {
382 relation_id = OID.getTop(node_id);
383 }
384
385 // now we either have a single node, or we have ancestors
386 DBInfo relation_info;
387 if (relation_id.equals(node_id)) {
388 relation_info = info;
389 } else {
[15326]390 relation_info = this.coll_db.getInfo(relation_id);
[8959]391 }
392 if (relation_info == null) {
393 return "";
394 }
395
396 StringBuffer result = new StringBuffer();
397
398 if (!multiple) {
399 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
400 } else {
401 // we have multiple meta
402 Vector values = relation_info.getMultiInfo(metadata);
[14035]403 if (values != null) {
[8959]404 boolean first = true;
405 for (int i=0; i<values.size(); i++) {
406 if (first) {
407 first = false;
408 } else {
409 result.append(separator);
410 }
411 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
412 }
413 }
[14035]414 logger.info(result);
[8959]415 }
416 // if not ancestors, then this is all we do
417 if (!relation.equals("ancestors")) {
418 return result.toString();
419 }
420
421 // now do the ancestors
422 String current_id = relation_id;
423 relation_id = OID.getParent(current_id);
424 while (!relation_id.equals(current_id)) {
[15326]425 relation_info = this.coll_db.getInfo(relation_id);
[8959]426 if (relation_info == null) return result.toString();
427 if (!multiple) {
428 result.insert(0, separator);
429 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
430 } else {
431 Vector values = relation_info.getMultiInfo(metadata);
432 if (values != null) {
433 for (int i=values.size()-1; i>=0; i--) {
434 result.insert(0, separator);
435 result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
436 }
437 }
438
439 }
440 current_id = relation_id;
441 relation_id = OID.getParent(current_id);
442 }
443 return result.toString();
444 }
445
446
[15326]447 /** needs to get info from collection database - if the calling code gets it already it may pay to pass it in instead */
[8959]448 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
449 {
450 // resolve any collection specific macros
451 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
452 return doc_content;
453 }
454
455 protected Element getInfo(String doc_id, String info_type) {
456
457 String value="";
458 if (info_type.equals(INFO_NUM_SIBS)) {
459 String parent_id = OID.getParent(doc_id);
460 if (parent_id.equals(doc_id)) {
461 value="0";
462 } else {
463 value = String.valueOf(getNumChildren(parent_id));
464 }
465 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
466 value = String.valueOf(getNumChildren(doc_id));
467 } else if (info_type.equals(INFO_SIB_POS)) {
468 String parent_id = OID.getParent(doc_id);
469 if (parent_id.equals(doc_id)) {
470 value="-1";
471 } else {
[15326]472 DBInfo info = this.coll_db.getInfo(parent_id);
[8959]473 if (info==null) {
474 value ="-1";
475 } else {
476 String contains = info.getInfo("contains");
477 contains = contains.replaceAll("\"", parent_id);
478 String [] children = contains.split(";");
479 for (int i=0;i<children.length;i++) {
480 String child_id = children[i];
481 if (child_id.equals(doc_id)) {
482 value = String.valueOf(i+1); // make it from 1 to length
483 break;
484 }
485 }
486 }
487 }
488 } else {
489 return null;
490 }
491 Element info_elem = this.doc.createElement("info");
492 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
493 info_elem.setAttribute(GSXML.VALUE_ATT, value);
494 return info_elem;
495 }
496
[14527]497 protected String getHrefOID(String href_url){
[15326]498 return this.coll_db.docnum2OID(href_url);
[14527]499 }
500
[8959]501}
Note: See TracBrowser for help on using the repository browser.