source: main/branches/64_bit_Greenstone/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 24007

Last change on this file since 24007 was 24007, checked in by sjm84, 13 years ago

Updating this branch to match the latest Greenstone3 changes

  • Property svn:keywords set to Author Date Id Revision
File size: 15.6 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46// Apache Commons
47import org.apache.commons.lang3.*;
48
49/** Implements the generic retrieval and classifier services for GS2
50 * collections.
51 *
52 * @author Katherine Don
53 * @author Michael Dewsnip
54 */
55
56public abstract class AbstractGS2DocumentRetrieve
57 extends AbstractDocumentRetrieve {
58
59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
60
61 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
62 protected String index_stem = null;
63
64 protected SimpleCollectionDatabase coll_db = null;
65
66
67 /** constructor */
68 protected AbstractGS2DocumentRetrieve()
69 {
70 this.macro_resolver = new GS2MacroResolver();
71 }
72
73 public void cleanUp() {
74 super.cleanUp();
75 this.coll_db.closeDatabase();
76 }
77 /** configure this service */
78 public boolean configure(Element info, Element extra_info)
79 {
80 if (!super.configure(info, extra_info)){
81 return false;
82 }
83
84 logger.info("Configuring AbstractGS2DocumentRetrieve...");
85 //this.config_info = info;
86
87 // the index stem is either specified in the config file or is the collection name
88 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
89 if (index_stem_elem != null) {
90 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
91 }
92 if (this.index_stem == null || this.index_stem.equals("")) {
93 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
94 this.index_stem = this.cluster_name;
95 }
96
97 // find out what kind of database we have
98 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
99 String database_type = null;
100 if (database_type_elem != null) {
101 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
102 }
103 if (database_type == null || database_type.equals("")) {
104 database_type = "gdbm"; // the default
105 }
106 coll_db = new SimpleCollectionDatabase(database_type);
107 if (!coll_db.databaseOK()) {
108 logger.error("Couldn't create the collection database of type "+database_type);
109 return false;
110 }
111
112 // Open database for querying
113 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
114 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
115 logger.error("Could not open collection database!");
116 return false;
117 }
118
119 // we need to set the database for our GS2 macro resolver
120 GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver)this.macro_resolver;
121 gs2_macro_resolver.setDB(this.coll_db);
122
123 return true;
124 }
125
126 /** if id ends in .fc, .pc etc, then translate it to the correct id */
127 protected String translateId(String node_id) {
128 return OID.translateOID(this.coll_db, node_id); //return this.coll_db.translateOID(node_id);
129 }
130
131 /** if an id is not a greenstone id (an external id) then translate
132 it to a greenstone one*/
133 protected String translateExternalId(String node_id){
134 return this.coll_db.externalId2OID(node_id);
135 }
136
137 /** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
138 protected String getRootId(String node_id) {
139 return OID.getTop(node_id);
140 }
141 /** returns a list of the child ids in order, null if no children */
142 protected ArrayList getChildrenIds(String node_id) {
143 DBInfo info = this.coll_db.getInfo(node_id);
144 if (info == null) {
145 return null;
146 }
147
148 String contains = info.getInfo("contains");
149 if (contains.equals("")) {
150 return null;
151 }
152 ArrayList children = new ArrayList();
153 StringTokenizer st = new StringTokenizer(contains, ";");
154 while (st.hasMoreTokens()) {
155 String child_id = StringUtils.replace(st.nextToken(), "\"", node_id);
156 children.add(child_id);
157 }
158 return children;
159
160 }
161 /** returns the node id of the parent node, null if no parent */
162 protected String getParentId(String node_id){
163 String parent = OID.getParent(node_id);
164 if (parent.equals(node_id)) {
165 return null;
166 }
167 return parent;
168 }
169
170 /** get the metadata for the classifier node node_id
171 * returns a metadataList element:
172 * <metadataList><metadata name="xxx">value</metadata></metadataList>
173 */
174 // assumes only one value per metadata
175 protected Element getMetadataList(String node_id, boolean all_metadata,
176 ArrayList metadata_names)
177 throws GSException {
178 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
179 DBInfo info = this.coll_db.getInfo(node_id);
180 if (info == null) {
181 return null;
182 }
183 String lang = "en"; // why do we need this??
184 if (all_metadata) {
185 // return everything out of the database
186 Set keys = info.getKeys();
187 Iterator it = keys.iterator();
188 while(it.hasNext()) {
189 String key = (String)it.next();
190 //String value = info.getInfo(key);
191 Vector values = info.getMultiInfo(key);
192 for(int i=0; i<values.size(); i++) {
193 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
194 }
195 }
196
197 } else {
198 for (int i=0; i<metadata_names.size(); i++) {
199 String meta_name = (String) metadata_names.get(i);
200 String value = getMetadata(node_id, info, meta_name, lang);
201 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
202 }
203 }
204 return metadata_list;
205 }
206
207 /** returns the structural information asked for.
208 * info_type may be one of
209 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
210 */
211 protected String getStructureInfo(String doc_id, String info_type) {
212 String value="";
213 if (info_type.equals(INFO_NUM_SIBS)) {
214 String parent_id = OID.getParent(doc_id);
215 if (parent_id.equals(doc_id)) {
216 value="0";
217 } else {
218 value = String.valueOf(getNumChildren(parent_id));
219 }
220 return value;
221 }
222
223 if (info_type.equals(INFO_NUM_CHILDREN)) {
224 return String.valueOf(getNumChildren(doc_id));
225 }
226
227
228 if (info_type.equals(INFO_SIB_POS)) {
229 String parent_id = OID.getParent(doc_id);
230 if (parent_id.equals(doc_id)) {
231 return "-1";
232 }
233
234 DBInfo info = this.coll_db.getInfo(parent_id);
235 if (info==null) {
236 return "-1";
237 }
238
239 String contains = info.getInfo("contains");
240 contains = StringUtils.replace(contains, "\"", parent_id);
241 String [] children = contains.split(";");
242 for (int i=0;i<children.length;i++) {
243 String child_id = children[i];
244 if (child_id.equals(doc_id)) {
245 return String.valueOf(i+1); // make it from 1 to length
246
247 }
248 }
249
250 return "-1";
251 } else {
252 return null;
253 }
254
255 }
256
257 protected int getNumChildren(String node_id) {
258 DBInfo info = this.coll_db.getInfo(node_id);
259 if (info == null) {
260 return 0;
261 }
262 String contains = info.getInfo("contains");
263 if (contains.equals("")) {
264 return 0;
265 }
266 String [] children = contains.split(";");
267 return children.length;
268 }
269
270 /** returns the document type of the doc that the specified node
271 belongs to. should be one of
272 GSXML.DOC_TYPE_SIMPLE,
273 GSXML.DOC_TYPE_PAGED,
274 GSXML.DOC_TYPE_HIERARCHY
275 */
276 protected String getDocType(String node_id) {
277 DBInfo info = this.coll_db.getInfo(node_id);
278 if (info == null) {
279 return GSXML.DOC_TYPE_SIMPLE;
280 }
281 String doc_type = info.getInfo("doctype");
282 if (!doc_type.equals("")&&!doc_type.equals("doc")) {
283 return doc_type;
284 }
285
286 String top_id = OID.getTop(node_id);
287 boolean is_top = (top_id.equals(node_id) ? true : false);
288
289 String children = info.getInfo("contains");
290 boolean is_leaf = (children.equals("") ? true : false);
291
292 if (is_top && is_leaf) { // a single section document
293 return GSXML.DOC_TYPE_SIMPLE;
294 }
295
296 // now we just check the top node
297 if (!is_top) { // we need to look at the top info
298 info = this.coll_db.getInfo(top_id);
299 }
300 if (info == null) {
301 return GSXML.DOC_TYPE_HIERARCHY;
302 }
303
304 String childtype = info.getInfo("childtype");
305 if (childtype.equals("Paged")) {
306 return GSXML.DOC_TYPE_PAGED;
307 }
308 return GSXML.DOC_TYPE_HIERARCHY;
309 }
310
311 /** returns the content of a node
312 * should return a nodeContent element:
313 * <nodeContent>text content or other elements</nodeContent>
314 */
315 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
316
317 protected String getMetadata(String node_id, DBInfo info,
318 String metadata, String lang) {
319 boolean multiple = false;
320 String relation = "";
321 String separator = ", ";
322 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
323 if (pos ==-1) {
324 Vector values = info.getMultiInfo(metadata);
325 if (values !=null){
326 // just a plain meta entry eg dc.Title
327 StringBuffer result = new StringBuffer();
328 boolean first = true;
329 for (int i=0; i<values.size(); i++) {
330 if (first) {
331 first = false;
332 } else {
333 result.append(separator);
334 }
335 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
336 }
337 return result.toString();
338 }
339 else{
340 String result = info.getInfo(metadata);
341 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
342 }
343 }
344
345 String temp = metadata.substring(0, pos);
346 metadata = metadata.substring(pos+1);
347 // check for all on the front
348 if (temp.equals("all")) {
349 multiple=true;
350 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
351 if (pos ==-1) {
352 temp = "";
353 } else {
354 temp = metadata.substring(0, pos);
355 metadata = metadata.substring(pos+1);
356 }
357 }
358
359 // now check for relational info
360 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendants"
361 relation = temp;
362 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
363 if (pos == -1) {
364 temp = "";
365 } else {
366 temp = metadata.substring(0, pos);
367 metadata = metadata.substring(pos+1);
368 }
369 }
370
371 // now look for separator info
372 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
373 separator = temp.substring(1, temp.length()-1);
374
375 }
376
377 String relation_id = node_id;
378 if (relation.equals("parent") || relation.equals("ancestors")) {
379 relation_id = OID.getParent(node_id);
380 // parent or ancestor does not include self
381 if (relation_id.equals(node_id)){
382 return "";
383 }
384 } else if (relation.equals("root")) {
385 relation_id = OID.getTop(node_id);
386 }
387
388 // now we either have a single node, or we have ancestors
389 DBInfo relation_info;
390 if (relation_id.equals(node_id)) {
391 relation_info = info;
392 } else {
393 relation_info = this.coll_db.getInfo(relation_id);
394 }
395 if (relation_info == null) {
396 return "";
397 }
398
399 StringBuffer result = new StringBuffer();
400
401 if (!multiple) {
402 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
403 } else {
404 // we have multiple meta
405 Vector values = relation_info.getMultiInfo(metadata);
406 if (values != null) {
407 boolean first = true;
408 for (int i=0; i<values.size(); i++) {
409 if (first) {
410 first = false;
411 } else {
412 result.append(separator);
413 }
414 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
415 }
416 }
417 logger.info(result);
418 }
419 // if not ancestors, then this is all we do
420 if (!relation.equals("ancestors")) {
421 return result.toString();
422 }
423
424 // now do the ancestors
425 String current_id = relation_id;
426 relation_id = OID.getParent(current_id);
427 while (!relation_id.equals(current_id)) {
428 relation_info = this.coll_db.getInfo(relation_id);
429 if (relation_info == null) return result.toString();
430 if (!multiple) {
431 result.insert(0, separator);
432 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
433 } else {
434 Vector values = relation_info.getMultiInfo(metadata);
435 if (values != null) {
436 for (int i=values.size()-1; i>=0; i--) {
437 result.insert(0, separator);
438 result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
439 }
440 }
441
442 }
443 current_id = relation_id;
444 relation_id = OID.getParent(current_id);
445 }
446 return result.toString();
447 }
448
449
450 /** needs to get info from collection database - if the calling code gets it already it may pay to pass it in instead */
451 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
452 {
453 // resolve any collection specific macros
454 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
455 return doc_content;
456 }
457
458 protected Element getInfo(String doc_id, String info_type) {
459
460 String value="";
461 if (info_type.equals(INFO_NUM_SIBS)) {
462 String parent_id = OID.getParent(doc_id);
463 if (parent_id.equals(doc_id)) {
464 value="0";
465 } else {
466 value = String.valueOf(getNumChildren(parent_id));
467 }
468 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
469 value = String.valueOf(getNumChildren(doc_id));
470 } else if (info_type.equals(INFO_SIB_POS)) {
471 String parent_id = OID.getParent(doc_id);
472 if (parent_id.equals(doc_id)) {
473 value="-1";
474 } else {
475 DBInfo info = this.coll_db.getInfo(parent_id);
476 if (info==null) {
477 value ="-1";
478 } else {
479 String contains = info.getInfo("contains");
480 contains = StringUtils.replace(contains, "\"", parent_id);
481 String [] children = contains.split(";");
482 for (int i=0;i<children.length;i++) {
483 String child_id = children[i];
484 if (child_id.equals(doc_id)) {
485 value = String.valueOf(i+1); // make it from 1 to length
486 break;
487 }
488 }
489 }
490 }
491 } else {
492 return null;
493 }
494 Element info_elem = this.doc.createElement("info");
495 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
496 info_elem.setAttribute(GSXML.VALUE_ATT, value);
497 return info_elem;
498 }
499
500 protected String getHrefOID(String href_url){
501 return this.coll_db.docnum2OID(href_url);
502 }
503
504}
Note: See TracBrowser for help on using the repository browser.