source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 16810

Last change on this file since 16810 was 16810, checked in by ak19, 16 years ago

GDBMWrapper converts the key string to UTF8 now before doing the lookup. The href_url key is no longer URLencoded by HTMLPlugin, which means it is in UTF8. Therefore getHrefOID() here does not need to do any corresponding URL encoding of the key for the database lookup either.

  • Property svn:keywords set to Author Date Id Revision
File size: 15.5 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46/** Implements the generic retrieval and classifier services for GS2
47 * collections.
48 *
49 * @author <a href="mailto:[email protected]">Katherine Don</a>
50 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
51 */
52
53public abstract class AbstractGS2DocumentRetrieve
54 extends AbstractDocumentRetrieve {
55
56 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
57
58 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
59 protected String index_stem = null;
60
61 protected SimpleCollectionDatabase coll_db = null;
62
63
64 /** constructor */
65 protected AbstractGS2DocumentRetrieve()
66 {
67 this.macro_resolver = new GS2MacroResolver();
68 }
69
70 public void cleanUp() {
71 super.cleanUp();
72 this.coll_db.closeDatabase();
73 }
74 /** configure this service */
75 public boolean configure(Element info, Element extra_info)
76 {
77 if (!super.configure(info, extra_info)){
78 return false;
79 }
80
81 logger.info("Configuring AbstractGS2DocumentRetrieve...");
82 //this.config_info = info;
83
84 // the index stem is either specified in the config file or is the collection name
85 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
86 if (index_stem_elem != null) {
87 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
88 }
89 if (this.index_stem == null || this.index_stem.equals("")) {
90 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
91 this.index_stem = this.cluster_name;
92 }
93
94 // find out what kind of database we have
95 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
96 String database_type = null;
97 if (database_type_elem != null) {
98 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
99 }
100 if (database_type == null || database_type.equals("")) {
101 database_type = "gdbm"; // the default
102 }
103 coll_db = new SimpleCollectionDatabase(database_type);
104 if (coll_db == null) {
105 logger.error("Couldn't create the collection database of type "+database_type);
106 return false;
107 }
108
109 // Open database for querying
110 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
111 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
112 logger.error("Could not open collection database!");
113 return false;
114 }
115
116 // we need to set the database for our GS2 macro resolver
117 GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver)this.macro_resolver;
118 gs2_macro_resolver.setDB(this.coll_db);
119
120 return true;
121 }
122
123 /** if id ends in .fc, .pc etc, then translate it to the correct id */
124 protected String translateId(String node_id) {
125 return this.coll_db.translateOID(node_id);
126 }
127
128 /** if an id is not a greenstone id (an external id) then translate
129 it to a greenstone one*/
130 protected String translateExternalId(String node_id){
131 return this.coll_db.externalId2OID(node_id);
132 }
133
134 /** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
135 protected String getRootId(String node_id) {
136 return OID.getTop(node_id);
137 }
138 /** returns a list of the child ids in order, null if no children */
139 protected ArrayList getChildrenIds(String node_id) {
140 DBInfo info = this.coll_db.getInfo(node_id);
141 if (info == null) {
142 return null;
143 }
144
145 String contains = info.getInfo("contains");
146 if (contains.equals("")) {
147 return null;
148 }
149 ArrayList children = new ArrayList();
150 StringTokenizer st = new StringTokenizer(contains, ";");
151 while (st.hasMoreTokens()) {
152 String child_id = st.nextToken().replaceAll("\"", node_id);
153 children.add(child_id);
154 }
155 return children;
156
157 }
158 /** returns the node id of the parent node, null if no parent */
159 protected String getParentId(String node_id){
160 String parent = OID.getParent(node_id);
161 if (parent.equals(node_id)) {
162 return null;
163 }
164 return parent;
165 }
166
167 /** get the metadata for the classifier node node_id
168 * returns a metadataList element:
169 * <metadataList><metadata name="xxx">value</metadata></metadataList>
170 */
171 // assumes only one value per metadata
172 protected Element getMetadataList(String node_id, boolean all_metadata,
173 ArrayList metadata_names)
174 throws GSException {
175 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
176 DBInfo info = this.coll_db.getInfo(node_id);
177 if (info == null) {
178 return null;
179 }
180 String lang = "en"; // why do we need this??
181 if (all_metadata) {
182 // return everything out of the database
183 Set keys = info.getKeys();
184 Iterator it = keys.iterator();
185 while(it.hasNext()) {
186 String key = (String)it.next();
187 //String value = info.getInfo(key);
188 Vector values = info.getMultiInfo(key);
189 for(int i=0; i<values.size(); i++) {
190 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
191 }
192 }
193
194 } else {
195 for (int i=0; i<metadata_names.size(); i++) {
196 String meta_name = (String) metadata_names.get(i);
197 String value = getMetadata(node_id, info, meta_name, lang);
198 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
199 }
200 }
201 return metadata_list;
202 }
203
204 /** returns the structural information asked for.
205 * info_type may be one of
206 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
207 */
208 protected String getStructureInfo(String doc_id, String info_type) {
209 String value="";
210 if (info_type.equals(INFO_NUM_SIBS)) {
211 String parent_id = OID.getParent(doc_id);
212 if (parent_id.equals(doc_id)) {
213 value="0";
214 } else {
215 value = String.valueOf(getNumChildren(parent_id));
216 }
217 return value;
218 }
219
220 if (info_type.equals(INFO_NUM_CHILDREN)) {
221 return String.valueOf(getNumChildren(doc_id));
222 }
223
224
225 if (info_type.equals(INFO_SIB_POS)) {
226 String parent_id = OID.getParent(doc_id);
227 if (parent_id.equals(doc_id)) {
228 return "-1";
229 }
230
231 DBInfo info = this.coll_db.getInfo(parent_id);
232 if (info==null) {
233 return "-1";
234 }
235
236 String contains = info.getInfo("contains");
237 contains = contains.replaceAll("\"", parent_id);
238 String [] children = contains.split(";");
239 for (int i=0;i<children.length;i++) {
240 String child_id = children[i];
241 if (child_id.equals(doc_id)) {
242 return String.valueOf(i+1); // make it from 1 to length
243
244 }
245 }
246
247 return "-1";
248 } else {
249 return null;
250 }
251
252 }
253
254 protected int getNumChildren(String node_id) {
255 DBInfo info = this.coll_db.getInfo(node_id);
256 if (info == null) {
257 return 0;
258 }
259 String contains = info.getInfo("contains");
260 if (contains.equals("")) {
261 return 0;
262 }
263 String [] children = contains.split(";");
264 return children.length;
265 }
266
267 /** returns the document type of the doc that the specified node
268 belongs to. should be one of
269 GSXML.DOC_TYPE_SIMPLE,
270 GSXML.DOC_TYPE_PAGED,
271 GSXML.DOC_TYPE_HIERARCHY
272 */
273 protected String getDocType(String node_id) {
274 DBInfo info = this.coll_db.getInfo(node_id);
275 if (info == null) {
276 return GSXML.DOC_TYPE_SIMPLE;
277 }
278 String doc_type = info.getInfo("doctype");
279 if (!doc_type.equals("")&&!doc_type.equals("doc")) {
280 return doc_type;
281 }
282
283 String top_id = OID.getTop(node_id);
284 boolean is_top = (top_id.equals(node_id) ? true : false);
285
286 String children = info.getInfo("contains");
287 boolean is_leaf = (children.equals("") ? true : false);
288
289 if (is_top && is_leaf) { // a single section document
290 return GSXML.DOC_TYPE_SIMPLE;
291 }
292
293 // now we just check the top node
294 if (!is_top) { // we need to look at the top info
295 info = this.coll_db.getInfo(top_id);
296 }
297 if (info == null) {
298 return GSXML.DOC_TYPE_HIERARCHY;
299 }
300
301 String childtype = info.getInfo("childtype");
302 if (childtype.equals("Paged")) {
303 return GSXML.DOC_TYPE_PAGED;
304 }
305 return GSXML.DOC_TYPE_HIERARCHY;
306 }
307
308 /** returns the content of a node
309 * should return a nodeContent element:
310 * <nodeContent>text content or other elements</nodeContent>
311 */
312 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
313
314 protected String getMetadata(String node_id, DBInfo info,
315 String metadata, String lang) {
316 boolean multiple = false;
317 String relation = "";
318 String separator = ", ";
319 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
320 if (pos ==-1) {
321 Vector values = info.getMultiInfo(metadata);
322 if (values !=null){
323 // just a plain meta entry eg dc.Title
324 StringBuffer result = new StringBuffer();
325 boolean first = true;
326 for (int i=0; i<values.size(); i++) {
327 if (first) {
328 first = false;
329 } else {
330 result.append(separator);
331 }
332 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
333 }
334 return result.toString();
335 }
336 else{
337 String result = info.getInfo(metadata);
338 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
339 }
340 }
341
342 String temp = metadata.substring(0, pos);
343 metadata = metadata.substring(pos+1);
344 // check for all on the front
345 if (temp.equals("all")) {
346 multiple=true;
347 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
348 if (pos ==-1) {
349 temp = "";
350 } else {
351 temp = metadata.substring(0, pos);
352 metadata = metadata.substring(pos+1);
353 }
354 }
355
356 // now check for relational info
357 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendants"
358 relation = temp;
359 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
360 if (pos == -1) {
361 temp = "";
362 } else {
363 temp = metadata.substring(0, pos);
364 metadata = metadata.substring(pos+1);
365 }
366 }
367
368 // now look for separator info
369 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
370 separator = temp.substring(1, temp.length()-1);
371
372 }
373
374 String relation_id = node_id;
375 if (relation.equals("parent") || relation.equals("ancestors")) {
376 relation_id = OID.getParent(node_id);
377 // parent or ancestor does not include self
378 if (relation_id.equals(node_id)){
379 return "";
380 }
381 } else if (relation.equals("root")) {
382 relation_id = OID.getTop(node_id);
383 }
384
385 // now we either have a single node, or we have ancestors
386 DBInfo relation_info;
387 if (relation_id.equals(node_id)) {
388 relation_info = info;
389 } else {
390 relation_info = this.coll_db.getInfo(relation_id);
391 }
392 if (relation_info == null) {
393 return "";
394 }
395
396 StringBuffer result = new StringBuffer();
397
398 if (!multiple) {
399 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
400 } else {
401 // we have multiple meta
402 Vector values = relation_info.getMultiInfo(metadata);
403 if (values != null) {
404 boolean first = true;
405 for (int i=0; i<values.size(); i++) {
406 if (first) {
407 first = false;
408 } else {
409 result.append(separator);
410 }
411 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
412 }
413 }
414 logger.info(result);
415 }
416 // if not ancestors, then this is all we do
417 if (!relation.equals("ancestors")) {
418 return result.toString();
419 }
420
421 // now do the ancestors
422 String current_id = relation_id;
423 relation_id = OID.getParent(current_id);
424 while (!relation_id.equals(current_id)) {
425 relation_info = this.coll_db.getInfo(relation_id);
426 if (relation_info == null) return result.toString();
427 if (!multiple) {
428 result.insert(0, separator);
429 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
430 } else {
431 Vector values = relation_info.getMultiInfo(metadata);
432 if (values != null) {
433 for (int i=values.size()-1; i>=0; i--) {
434 result.insert(0, separator);
435 result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
436 }
437 }
438
439 }
440 current_id = relation_id;
441 relation_id = OID.getParent(current_id);
442 }
443 return result.toString();
444 }
445
446
447 /** needs to get info from collection database - if the calling code gets it already it may pay to pass it in instead */
448 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
449 {
450 // resolve any collection specific macros
451 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
452 return doc_content;
453 }
454
455 protected Element getInfo(String doc_id, String info_type) {
456
457 String value="";
458 if (info_type.equals(INFO_NUM_SIBS)) {
459 String parent_id = OID.getParent(doc_id);
460 if (parent_id.equals(doc_id)) {
461 value="0";
462 } else {
463 value = String.valueOf(getNumChildren(parent_id));
464 }
465 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
466 value = String.valueOf(getNumChildren(doc_id));
467 } else if (info_type.equals(INFO_SIB_POS)) {
468 String parent_id = OID.getParent(doc_id);
469 if (parent_id.equals(doc_id)) {
470 value="-1";
471 } else {
472 DBInfo info = this.coll_db.getInfo(parent_id);
473 if (info==null) {
474 value ="-1";
475 } else {
476 String contains = info.getInfo("contains");
477 contains = contains.replaceAll("\"", parent_id);
478 String [] children = contains.split(";");
479 for (int i=0;i<children.length;i++) {
480 String child_id = children[i];
481 if (child_id.equals(doc_id)) {
482 value = String.valueOf(i+1); // make it from 1 to length
483 break;
484 }
485 }
486 }
487 }
488 } else {
489 return null;
490 }
491 Element info_elem = this.doc.createElement("info");
492 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
493 info_elem.setAttribute(GSXML.VALUE_ATT, value);
494 return info_elem;
495 }
496
497 protected String getHrefOID(String href_url){
498 return this.coll_db.docnum2OID(href_url);
499 }
500
501}
Note: See TracBrowser for help on using the repository browser.