source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 15754

Last change on this file since 15754 was 15754, checked in by ak19, 16 years ago

Greenstone image macros get resolved again after the database change.

  • Property svn:keywords set to Author Date Id Revision
File size: 15.4 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46/** Implements the generic retrieval and classifier services for GS2
47 * collections.
48 *
49 * @author <a href="mailto:[email protected]">Katherine Don</a>
50 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
51 */
52
53public abstract class AbstractGS2DocumentRetrieve
54 extends AbstractDocumentRetrieve {
55
56 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
57
58 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
59 protected String index_stem = null;
60
61 protected SimpleCollectionDatabase coll_db = null;
62
63
64 /** constructor */
65 protected AbstractGS2DocumentRetrieve()
66 {
67 this.macro_resolver = new GS2MacroResolver();
68 }
69
70 public void cleanUp() {
71 super.cleanUp();
72 this.coll_db.closeDatabase();
73 }
74 /** configure this service */
75 public boolean configure(Element info, Element extra_info)
76 {
77 if (!super.configure(info, extra_info)){
78 return false;
79 }
80
81 logger.info("Configuring AbstractGS2DocumentRetrieve...");
82 //this.config_info = info;
83
84 // the index stem is either specified in the config file or is the collection name
85 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
86 if (index_stem_elem != null) {
87 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
88 }
89 if (this.index_stem == null || this.index_stem.equals("")) {
90 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
91 this.index_stem = this.cluster_name;
92 }
93
94 // find out what kind of database we have
95 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
96 String database_type = null;
97 if (database_type_elem != null) {
98 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
99 }
100 if (database_type == null || database_type.equals("")) {
101 database_type = "gdbm"; // the default
102 }
103 coll_db = new SimpleCollectionDatabase(database_type);
104 if (coll_db == null) {
105 logger.error("Couldn't create the collection database of type "+database_type);
106 return false;
107 }
108
109 // Open database for querying
110 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
111 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
112 logger.error("Could not open collection database!");
113 return false;
114 }
115 ((GS2MacroResolver)this.macro_resolver).setDB(this.coll_db);
116
117 return true;
118 }
119
120 /** if id ends in .fc, .pc etc, then translate it to the correct id */
121 protected String translateId(String node_id) {
122 return this.coll_db.translateOID(node_id);
123 }
124
125 /** if an id is not a greenstone id (an external id) then translate
126 it to a greenstone one*/
127 protected String translateExternalId(String node_id){
128 return this.coll_db.externalId2OID(node_id);
129 }
130
131 /** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
132 protected String getRootId(String node_id) {
133 return OID.getTop(node_id);
134 }
135 /** returns a list of the child ids in order, null if no children */
136 protected ArrayList getChildrenIds(String node_id) {
137 DBInfo info = this.coll_db.getInfo(node_id);
138 if (info == null) {
139 return null;
140 }
141
142 String contains = info.getInfo("contains");
143 if (contains.equals("")) {
144 return null;
145 }
146 ArrayList children = new ArrayList();
147 StringTokenizer st = new StringTokenizer(contains, ";");
148 while (st.hasMoreTokens()) {
149 String child_id = st.nextToken().replaceAll("\"", node_id);
150 children.add(child_id);
151 }
152 return children;
153
154 }
155 /** returns the node id of the parent node, null if no parent */
156 protected String getParentId(String node_id){
157 String parent = OID.getParent(node_id);
158 if (parent.equals(node_id)) {
159 return null;
160 }
161 return parent;
162 }
163
164 /** get the metadata for the classifier node node_id
165 * returns a metadataList element:
166 * <metadataList><metadata name="xxx">value</metadata></metadataList>
167 */
168 // assumes only one value per metadata
169 protected Element getMetadataList(String node_id, boolean all_metadata,
170 ArrayList metadata_names)
171 throws GSException {
172 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
173 DBInfo info = this.coll_db.getInfo(node_id);
174 if (info == null) {
175 return null;
176 }
177 String lang = "en"; // why do we need this??
178 if (all_metadata) {
179 // return everything out of the database
180 Set keys = info.getKeys();
181 Iterator it = keys.iterator();
182 while(it.hasNext()) {
183 String key = (String)it.next();
184 //String value = info.getInfo(key);
185 Vector values = info.getMultiInfo(key);
186 for(int i=0; i<values.size(); i++) {
187 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
188 }
189 }
190
191 } else {
192 for (int i=0; i<metadata_names.size(); i++) {
193 String meta_name = (String) metadata_names.get(i);
194 String value = getMetadata(node_id, info, meta_name, lang);
195 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
196 }
197 }
198 return metadata_list;
199 }
200
201 /** returns the structural information asked for.
202 * info_type may be one of
203 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
204 */
205 protected String getStructureInfo(String doc_id, String info_type) {
206 String value="";
207 if (info_type.equals(INFO_NUM_SIBS)) {
208 String parent_id = OID.getParent(doc_id);
209 if (parent_id.equals(doc_id)) {
210 value="0";
211 } else {
212 value = String.valueOf(getNumChildren(parent_id));
213 }
214 return value;
215 }
216
217 if (info_type.equals(INFO_NUM_CHILDREN)) {
218 return String.valueOf(getNumChildren(doc_id));
219 }
220
221
222 if (info_type.equals(INFO_SIB_POS)) {
223 String parent_id = OID.getParent(doc_id);
224 if (parent_id.equals(doc_id)) {
225 return "-1";
226 }
227
228 DBInfo info = this.coll_db.getInfo(parent_id);
229 if (info==null) {
230 return "-1";
231 }
232
233 String contains = info.getInfo("contains");
234 contains = contains.replaceAll("\"", parent_id);
235 String [] children = contains.split(";");
236 for (int i=0;i<children.length;i++) {
237 String child_id = children[i];
238 if (child_id.equals(doc_id)) {
239 return String.valueOf(i+1); // make it from 1 to length
240
241 }
242 }
243
244 return "-1";
245 } else {
246 return null;
247 }
248
249 }
250
251 protected int getNumChildren(String node_id) {
252 DBInfo info = this.coll_db.getInfo(node_id);
253 if (info == null) {
254 return 0;
255 }
256 String contains = info.getInfo("contains");
257 if (contains.equals("")) {
258 return 0;
259 }
260 String [] children = contains.split(";");
261 return children.length;
262 }
263
264 /** returns the document type of the doc that the specified node
265 belongs to. should be one of
266 GSXML.DOC_TYPE_SIMPLE,
267 GSXML.DOC_TYPE_PAGED,
268 GSXML.DOC_TYPE_HIERARCHY
269 */
270 protected String getDocType(String node_id) {
271 DBInfo info = this.coll_db.getInfo(node_id);
272 if (info == null) {
273 return GSXML.DOC_TYPE_SIMPLE;
274 }
275 String doc_type = info.getInfo("doctype");
276 if (!doc_type.equals("")&&!doc_type.equals("doc")) {
277 return doc_type;
278 }
279
280 String top_id = OID.getTop(node_id);
281 boolean is_top = (top_id.equals(node_id) ? true : false);
282
283 String children = info.getInfo("contains");
284 boolean is_leaf = (children.equals("") ? true : false);
285
286 if (is_top && is_leaf) { // a single section document
287 return GSXML.DOC_TYPE_SIMPLE;
288 }
289
290 // now we just check the top node
291 if (!is_top) { // we need to look at the top info
292 info = this.coll_db.getInfo(top_id);
293 }
294 if (info == null) {
295 return GSXML.DOC_TYPE_HIERARCHY;
296 }
297
298 String childtype = info.getInfo("childtype");
299 if (childtype.equals("Paged")) {
300 return GSXML.DOC_TYPE_PAGED;
301 }
302 return GSXML.DOC_TYPE_HIERARCHY;
303 }
304
305 /** returns the content of a node
306 * should return a nodeContent element:
307 * <nodeContent>text content or other elements</nodeContent>
308 */
309 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
310
311 protected String getMetadata(String node_id, DBInfo info,
312 String metadata, String lang) {
313 boolean multiple = false;
314 String relation = "";
315 String separator = ", ";
316 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
317 if (pos ==-1) {
318 Vector values = info.getMultiInfo(metadata);
319 if (values !=null){
320 // just a plain meta entry eg dc.Title
321 StringBuffer result = new StringBuffer();
322 boolean first = true;
323 for (int i=0; i<values.size(); i++) {
324 if (first) {
325 first = false;
326 } else {
327 result.append(separator);
328 }
329 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
330 }
331 return result.toString();
332 }
333 else{
334 String result = info.getInfo(metadata);
335 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
336 }
337 }
338
339 String temp = metadata.substring(0, pos);
340 metadata = metadata.substring(pos+1);
341 // check for all on the front
342 if (temp.equals("all")) {
343 multiple=true;
344 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
345 if (pos ==-1) {
346 temp = "";
347 } else {
348 temp = metadata.substring(0, pos);
349 metadata = metadata.substring(pos+1);
350 }
351 }
352
353 // now check for relational info
354 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendants"
355 relation = temp;
356 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
357 if (pos == -1) {
358 temp = "";
359 } else {
360 temp = metadata.substring(0, pos);
361 metadata = metadata.substring(pos+1);
362 }
363 }
364
365 // now look for separator info
366 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
367 separator = temp.substring(1, temp.length()-1);
368
369 }
370
371 String relation_id = node_id;
372 if (relation.equals("parent") || relation.equals("ancestors")) {
373 relation_id = OID.getParent(node_id);
374 // parent or ancestor does not include self
375 if (relation_id.equals(node_id)){
376 return "";
377 }
378 } else if (relation.equals("root")) {
379 relation_id = OID.getTop(node_id);
380 }
381
382 // now we either have a single node, or we have ancestors
383 DBInfo relation_info;
384 if (relation_id.equals(node_id)) {
385 relation_info = info;
386 } else {
387 relation_info = this.coll_db.getInfo(relation_id);
388 }
389 if (relation_info == null) {
390 return "";
391 }
392
393 StringBuffer result = new StringBuffer();
394
395 if (!multiple) {
396 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
397 } else {
398 // we have multiple meta
399 Vector values = relation_info.getMultiInfo(metadata);
400 if (values != null) {
401 boolean first = true;
402 for (int i=0; i<values.size(); i++) {
403 if (first) {
404 first = false;
405 } else {
406 result.append(separator);
407 }
408 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
409 }
410 }
411 logger.info(result);
412 }
413 // if not ancestors, then this is all we do
414 if (!relation.equals("ancestors")) {
415 return result.toString();
416 }
417
418 // now do the ancestors
419 String current_id = relation_id;
420 relation_id = OID.getParent(current_id);
421 while (!relation_id.equals(current_id)) {
422 relation_info = this.coll_db.getInfo(relation_id);
423 if (relation_info == null) return result.toString();
424 if (!multiple) {
425 result.insert(0, separator);
426 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
427 } else {
428 Vector values = relation_info.getMultiInfo(metadata);
429 if (values != null) {
430 for (int i=values.size()-1; i>=0; i--) {
431 result.insert(0, separator);
432 result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
433 }
434 }
435
436 }
437 current_id = relation_id;
438 relation_id = OID.getParent(current_id);
439 }
440 return result.toString();
441 }
442
443
444 /** needs to get info from collection database - if the calling code gets it already it may pay to pass it in instead */
445 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
446 {
447 // resolve any collection specific macros
448 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
449 return doc_content;
450 }
451
452 protected Element getInfo(String doc_id, String info_type) {
453
454 String value="";
455 if (info_type.equals(INFO_NUM_SIBS)) {
456 String parent_id = OID.getParent(doc_id);
457 if (parent_id.equals(doc_id)) {
458 value="0";
459 } else {
460 value = String.valueOf(getNumChildren(parent_id));
461 }
462 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
463 value = String.valueOf(getNumChildren(doc_id));
464 } else if (info_type.equals(INFO_SIB_POS)) {
465 String parent_id = OID.getParent(doc_id);
466 if (parent_id.equals(doc_id)) {
467 value="-1";
468 } else {
469 DBInfo info = this.coll_db.getInfo(parent_id);
470 if (info==null) {
471 value ="-1";
472 } else {
473 String contains = info.getInfo("contains");
474 contains = contains.replaceAll("\"", parent_id);
475 String [] children = contains.split(";");
476 for (int i=0;i<children.length;i++) {
477 String child_id = children[i];
478 if (child_id.equals(doc_id)) {
479 value = String.valueOf(i+1); // make it from 1 to length
480 break;
481 }
482 }
483 }
484 }
485 } else {
486 return null;
487 }
488 Element info_elem = this.doc.createElement("info");
489 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
490 info_elem.setAttribute(GSXML.VALUE_ATT, value);
491 return info_elem;
492 }
493
494 protected String getHrefOID(String href_url){
495 return this.coll_db.docnum2OID(href_url);
496 }
497
498}
Note: See TracBrowser for help on using the repository browser.