source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 15208

Last change on this file since 15208 was 15208, checked in by kjdon, 16 years ago

changed descendents to descendants to fit with other code (only in a comment, but may help later on)

  • Property svn:keywords set to Author Date Id Revision
File size: 14.8 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.GDBMWrapper;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46/** Implements the generic retrieval and classifier services for GS2
47 * collections.
48 *
49 * @author <a href="mailto:[email protected]">Katherine Don</a>
50 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
51 */
52
53public abstract class AbstractGS2DocumentRetrieve
54 extends AbstractDocumentRetrieve {
55
56 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
57
58 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
59 protected String index_stem = null;
60
61 protected GDBMWrapper gdbm_src = null;
62
63
64 /** constructor */
65 protected AbstractGS2DocumentRetrieve()
66 {
67 this.gdbm_src = new GDBMWrapper();
68 this.macro_resolver = new GS2MacroResolver(this.gdbm_src);
69 }
70
71 public void cleanUp() {
72 super.cleanUp();
73 this.gdbm_src.closeDatabase();
74 }
75 /** configure this service */
76 public boolean configure(Element info, Element extra_info)
77 {
78 if (!super.configure(info, extra_info)){
79 return false;
80 }
81
82 logger.info("Configuring AbstractGS2DocumentRetrieve...");
83 //this.config_info = info;
84
85 // the index stem is either specified in the config file or is the collection name
86 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
87 if (index_stem_elem != null) {
88 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
89 }
90 if (this.index_stem == null || this.index_stem.equals("")) {
91 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
92 this.index_stem = this.cluster_name;
93 }
94
95 // Open GDBM database for querying
96 String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name, this.index_stem);
97 if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
98 logger.error("Could not open GDBM database!");
99 return false;
100 }
101
102 return true;
103 }
104
105 /** if id ends in .fc, .pc etc, then translate it to the correct id */
106 protected String translateId(String node_id) {
107 return this.gdbm_src.translateOID(node_id);
108 }
109
110 /** if an id is not a greenstone id (an external id) then translate
111 it to a greenstone one*/
112 protected String translateExternalId(String node_id){
113 return this.gdbm_src.externalId2OID(node_id);
114 }
115
116 /** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
117 protected String getRootId(String node_id) {
118 return OID.getTop(node_id);
119 }
120 /** returns a list of the child ids in order, null if no children */
121 protected ArrayList getChildrenIds(String node_id) {
122 DBInfo info = this.gdbm_src.getInfo(node_id);
123 if (info == null) {
124 return null;
125 }
126
127 String contains = info.getInfo("contains");
128 if (contains.equals("")) {
129 return null;
130 }
131 ArrayList children = new ArrayList();
132 StringTokenizer st = new StringTokenizer(contains, ";");
133 while (st.hasMoreTokens()) {
134 String child_id = st.nextToken().replaceAll("\"", node_id);
135 children.add(child_id);
136 }
137 return children;
138
139 }
140 /** returns the node id of the parent node, null if no parent */
141 protected String getParentId(String node_id){
142 String parent = OID.getParent(node_id);
143 if (parent.equals(node_id)) {
144 return null;
145 }
146 return parent;
147 }
148
149 /** get the metadata for the classifier node node_id
150 * returns a metadataList element:
151 * <metadataList><metadata name="xxx">value</metadata></metadataList>
152 */
153 // assumes only one value per metadata
154 protected Element getMetadataList(String node_id, boolean all_metadata,
155 ArrayList metadata_names)
156 throws GSException {
157 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
158 DBInfo info = this.gdbm_src.getInfo(node_id);
159 if (info == null) {
160 return null;
161 }
162 String lang = "en"; // why do we need this??
163 if (all_metadata) {
164 // return everything out of the database
165 Set keys = info.getKeys();
166 Iterator it = keys.iterator();
167 while(it.hasNext()) {
168 String key = (String)it.next();
169 //String value = info.getInfo(key);
170 Vector values = info.getMultiInfo(key);
171 for(int i=0; i<values.size(); i++) {
172 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
173 }
174 }
175
176 } else {
177 for (int i=0; i<metadata_names.size(); i++) {
178 String meta_name = (String) metadata_names.get(i);
179 String value = getMetadata(node_id, info, meta_name, lang);
180 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
181 }
182 }
183 return metadata_list;
184 }
185
186 /** returns the structural information asked for.
187 * info_type may be one of
188 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
189 */
190 protected String getStructureInfo(String doc_id, String info_type) {
191 String value="";
192 if (info_type.equals(INFO_NUM_SIBS)) {
193 String parent_id = OID.getParent(doc_id);
194 if (parent_id.equals(doc_id)) {
195 value="0";
196 } else {
197 value = String.valueOf(getNumChildren(parent_id));
198 }
199 return value;
200 }
201
202 if (info_type.equals(INFO_NUM_CHILDREN)) {
203 return String.valueOf(getNumChildren(doc_id));
204 }
205
206
207 if (info_type.equals(INFO_SIB_POS)) {
208 String parent_id = OID.getParent(doc_id);
209 if (parent_id.equals(doc_id)) {
210 return "-1";
211 }
212
213 DBInfo info = this.gdbm_src.getInfo(parent_id);
214 if (info==null) {
215 return "-1";
216 }
217
218 String contains = info.getInfo("contains");
219 contains = contains.replaceAll("\"", parent_id);
220 String [] children = contains.split(";");
221 for (int i=0;i<children.length;i++) {
222 String child_id = children[i];
223 if (child_id.equals(doc_id)) {
224 return String.valueOf(i+1); // make it from 1 to length
225
226 }
227 }
228
229 return "-1";
230 } else {
231 return null;
232 }
233
234 }
235
236 protected int getNumChildren(String node_id) {
237 DBInfo info = this.gdbm_src.getInfo(node_id);
238 if (info == null) {
239 return 0;
240 }
241 String contains = info.getInfo("contains");
242 if (contains.equals("")) {
243 return 0;
244 }
245 String [] children = contains.split(";");
246 return children.length;
247 }
248
249 /** returns the document type of the doc that the specified node
250 belongs to. should be one of
251 GSXML.DOC_TYPE_SIMPLE,
252 GSXML.DOC_TYPE_PAGED,
253 GSXML.DOC_TYPE_HIERARCHY
254 */
255 protected String getDocType(String node_id) {
256 DBInfo info = this.gdbm_src.getInfo(node_id);
257 if (info == null) {
258 return GSXML.DOC_TYPE_SIMPLE;
259 }
260 String doc_type = info.getInfo("doctype");
261 if (!doc_type.equals("")&&!doc_type.equals("doc")) {
262 return doc_type;
263 }
264
265 String top_id = OID.getTop(node_id);
266 boolean is_top = (top_id.equals(node_id) ? true : false);
267
268 String children = info.getInfo("contains");
269 boolean is_leaf = (children.equals("") ? true : false);
270
271 if (is_top && is_leaf) { // a single section document
272 return GSXML.DOC_TYPE_SIMPLE;
273 }
274
275 // now we just check the top node
276 if (!is_top) { // we need to look at the top info
277 info = this.gdbm_src.getInfo(top_id);
278 }
279 if (info == null) {
280 return GSXML.DOC_TYPE_HIERARCHY;
281 }
282
283 String childtype = info.getInfo("childtype");
284 if (childtype.equals("Paged")) {
285 return GSXML.DOC_TYPE_PAGED;
286 }
287 return GSXML.DOC_TYPE_HIERARCHY;
288 }
289
290 /** returns the content of a node
291 * should return a nodeContent element:
292 * <nodeContent>text content or other elements</nodeContent>
293 */
294 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
295
296 protected String getMetadata(String node_id, DBInfo info,
297 String metadata, String lang) {
298 boolean multiple = false;
299 String relation = "";
300 String separator = ", ";
301 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
302 if (pos ==-1) {
303 Vector values = info.getMultiInfo(metadata);
304 if (values !=null){
305 // just a plain meta entry eg dc.Title
306 StringBuffer result = new StringBuffer();
307 boolean first = true;
308 for (int i=0; i<values.size(); i++) {
309 if (first) {
310 first = false;
311 } else {
312 result.append(separator);
313 }
314 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
315 }
316 return result.toString();
317 }
318 else{
319 String result = info.getInfo(metadata);
320 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
321 }
322 }
323
324 String temp = metadata.substring(0, pos);
325 metadata = metadata.substring(pos+1);
326 // check for all on the front
327 if (temp.equals("all")) {
328 multiple=true;
329 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
330 if (pos ==-1) {
331 temp = "";
332 } else {
333 temp = metadata.substring(0, pos);
334 metadata = metadata.substring(pos+1);
335 }
336 }
337
338 // now check for relational info
339 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendants"
340 relation = temp;
341 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
342 if (pos == -1) {
343 temp = "";
344 } else {
345 temp = metadata.substring(0, pos);
346 metadata = metadata.substring(pos+1);
347 }
348 }
349
350 // now look for separator info
351 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
352 separator = temp.substring(1, temp.length()-1);
353
354 }
355
356 String relation_id = node_id;
357 if (relation.equals("parent") || relation.equals("ancestors")) {
358 relation_id = OID.getParent(node_id);
359 // parent or ancestor does not include self
360 if (relation_id.equals(node_id)){
361 return "";
362 }
363 } else if (relation.equals("root")) {
364 relation_id = OID.getTop(node_id);
365 }
366
367 // now we either have a single node, or we have ancestors
368 DBInfo relation_info;
369 if (relation_id.equals(node_id)) {
370 relation_info = info;
371 } else {
372 relation_info = this.gdbm_src.getInfo(relation_id);
373 }
374 if (relation_info == null) {
375 return "";
376 }
377
378 StringBuffer result = new StringBuffer();
379
380 if (!multiple) {
381 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
382 } else {
383 // we have multiple meta
384 Vector values = relation_info.getMultiInfo(metadata);
385 if (values != null) {
386 boolean first = true;
387 for (int i=0; i<values.size(); i++) {
388 if (first) {
389 first = false;
390 } else {
391 result.append(separator);
392 }
393 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
394 }
395 }
396 logger.info(result);
397 }
398 // if not ancestors, then this is all we do
399 if (!relation.equals("ancestors")) {
400 return result.toString();
401 }
402
403 // now do the ancestors
404 String current_id = relation_id;
405 relation_id = OID.getParent(current_id);
406 while (!relation_id.equals(current_id)) {
407 relation_info = this.gdbm_src.getInfo(relation_id);
408 if (relation_info == null) return result.toString();
409 if (!multiple) {
410 result.insert(0, separator);
411 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
412 } else {
413 Vector values = relation_info.getMultiInfo(metadata);
414 if (values != null) {
415 for (int i=values.size()-1; i>=0; i--) {
416 result.insert(0, separator);
417 result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
418 }
419 }
420
421 }
422 current_id = relation_id;
423 relation_id = OID.getParent(current_id);
424 }
425 return result.toString();
426 }
427
428
429 /** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
430 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
431 {
432 // resolve any collection specific macros
433 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
434 return doc_content;
435 }
436
437 protected Element getInfo(String doc_id, String info_type) {
438
439 String value="";
440 if (info_type.equals(INFO_NUM_SIBS)) {
441 String parent_id = OID.getParent(doc_id);
442 if (parent_id.equals(doc_id)) {
443 value="0";
444 } else {
445 value = String.valueOf(getNumChildren(parent_id));
446 }
447 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
448 value = String.valueOf(getNumChildren(doc_id));
449 } else if (info_type.equals(INFO_SIB_POS)) {
450 String parent_id = OID.getParent(doc_id);
451 if (parent_id.equals(doc_id)) {
452 value="-1";
453 } else {
454 DBInfo info = this.gdbm_src.getInfo(parent_id);
455 if (info==null) {
456 value ="-1";
457 } else {
458 String contains = info.getInfo("contains");
459 contains = contains.replaceAll("\"", parent_id);
460 String [] children = contains.split(";");
461 for (int i=0;i<children.length;i++) {
462 String child_id = children[i];
463 if (child_id.equals(doc_id)) {
464 value = String.valueOf(i+1); // make it from 1 to length
465 break;
466 }
467 }
468 }
469 }
470 } else {
471 return null;
472 }
473 Element info_elem = this.doc.createElement("info");
474 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
475 info_elem.setAttribute(GSXML.VALUE_ATT, value);
476 return info_elem;
477 }
478
479 protected String getHrefOID(String href_url){
480 return this.gdbm_src.docnum2OID(href_url);
481 }
482
483}
Note: See TracBrowser for help on using the repository browser.