source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 25967

Last change on this file since 25967 was 25967, checked in by kjdon, 12 years ago

added new doctype pagedhierarchy

  • Property svn:keywords set to Author Date Id Revision
File size: 17.0 KB
RevLine 
[8959]1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
[9874]22import org.greenstone.gsdl3.core.GSException;
[8959]23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
[15326]29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
[9874]30import org.greenstone.gsdl3.util.DBInfo;
[8959]31// XML classes
32import org.w3c.dom.Document;
[25427]33import org.w3c.dom.Element;
[8959]34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
[13124]44import org.apache.log4j.*;
45
[23792]46// Apache Commons
47import org.apache.commons.lang3.*;
48
[25427]49/**
50 * Implements the generic retrieval and classifier services for GS2 collections.
51 *
[21663]52 * @author Katherine Don
53 * @author Michael Dewsnip
[8959]54 */
55
[25427]56public abstract class AbstractGS2DocumentRetrieve extends AbstractDocumentRetrieve
57{
[8959]58
[25427]59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
[13124]60
[25427]61 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
62 protected String index_stem = null;
[9000]63
[25427]64 protected SimpleCollectionDatabase coll_db = null;
[8959]65
[25427]66 /** constructor */
67 protected AbstractGS2DocumentRetrieve()
68 {
69 this.macro_resolver = new GS2MacroResolver();
[10093]70 }
[10651]71
[25427]72 public void cleanUp()
73 {
74 super.cleanUp();
75 this.coll_db.closeDatabase();
[15326]76 }
[10651]77
[25427]78 /** configure this service */
79 public boolean configure(Element info, Element extra_info)
80 {
81 if (!super.configure(info, extra_info))
82 {
83 return false;
84 }
[15770]85
[25427]86 logger.info("Configuring AbstractGS2DocumentRetrieve...");
87 //this.config_info = info;
[8959]88
[25427]89 // the index stem is either specified in the config file or is the collection name
90 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
91 if (index_stem_elem != null)
92 {
93 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
94 }
95 if (this.index_stem == null || this.index_stem.equals(""))
96 {
97 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
98 this.index_stem = this.cluster_name;
99 }
[8959]100
[25427]101 // find out what kind of database we have
102 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
103 String database_type = null;
104 if (database_type_elem != null)
105 {
106 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
107 }
108 if (database_type == null || database_type.equals(""))
109 {
110 database_type = "gdbm"; // the default
111 }
112 coll_db = new SimpleCollectionDatabase(database_type);
113 if (!coll_db.databaseOK())
114 {
115 logger.error("Couldn't create the collection database of type " + database_type);
116 return false;
117 }
118
119 // Open database for querying
120 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
121 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ))
122 {
123 logger.error("Could not open collection database!");
124 return false;
125 }
126
127 // we need to set the database for our GS2 macro resolver
128 GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver) this.macro_resolver;
129 gs2_macro_resolver.setDB(this.coll_db);
130
131 return true;
[8959]132 }
133
[25427]134 /** if id ends in .fc, .pc etc, then translate it to the correct id */
135 protected String translateId(String node_id)
136 {
137 return OID.translateOID(this.coll_db, node_id); //return this.coll_db.translateOID(node_id);
[8959]138 }
[25427]139
140 /**
141 * if an id is not a greenstone id (an external id) then translate it to a
142 * greenstone one
143 */
144 protected String translateExternalId(String node_id)
145 {
146 return this.coll_db.externalId2OID(node_id);
[8959]147 }
148
[25427]149 /**
150 * returns the id of the root node of the document containing node node_id.
151 * . may be the same as node_id
152 */
153 protected String getRootId(String node_id)
154 {
155 return OID.getTop(node_id);
[8959]156 }
157
[25427]158 /** returns a list of the child ids in order, null if no children */
[25635]159 protected ArrayList<String> getChildrenIds(String node_id)
[25427]160 {
161 DBInfo info = this.coll_db.getInfo(node_id);
162 if (info == null)
163 {
164 return null;
[10334]165 }
[8959]166
[25427]167 String contains = info.getInfo("contains");
168 if (contains.equals(""))
169 {
170 return null;
171 }
[25635]172 ArrayList<String> children = new ArrayList<String>();
[25427]173 StringTokenizer st = new StringTokenizer(contains, ";");
174 while (st.hasMoreTokens())
175 {
176 String child_id = StringUtils.replace(st.nextToken(), "\"", node_id);
177 children.add(child_id);
178 }
179 return children;
180
[8959]181 }
182
[25427]183 /** returns the node id of the parent node, null if no parent */
184 protected String getParentId(String node_id)
185 {
186 String parent = OID.getParent(node_id);
187 if (parent.equals(node_id))
188 {
189 return null;
[8959]190 }
[25427]191 return parent;
[8959]192 }
193
[25427]194 /**
195 * get the metadata for the classifier node node_id returns a metadataList
196 * element: <metadataList><metadata
197 * name="xxx">value</metadata></metadataList>
198 */
199 // assumes only one value per metadata
[25635]200 protected Element getMetadataList(String node_id, boolean all_metadata, ArrayList<String> metadata_names) throws GSException
[25427]201 {
202 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
203 DBInfo info = this.coll_db.getInfo(node_id);
204 if (info == null)
205 {
206 return null;
207 }
208 String lang = "en"; // why do we need this??
209 if (all_metadata)
210 {
211 // return everything out of the database
[25635]212 Set<String> keys = info.getKeys();
213 Iterator<String> it = keys.iterator();
[25427]214 while (it.hasNext())
215 {
[25635]216 String key = it.next();
[25427]217 //String value = info.getInfo(key);
[25635]218 Vector<String> values = info.getMultiInfo(key);
[25427]219 for (int i = 0; i < values.size(); i++)
220 {
[25635]221 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
[25427]222 }
223 }
[8959]224
[25427]225 }
226 else
227 {
228 for (int i = 0; i < metadata_names.size(); i++)
229 {
[25635]230 String meta_name = metadata_names.get(i);
[25427]231 String value = getMetadata(node_id, info, meta_name, lang);
232 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
233 }
234 }
235 return metadata_list;
[8959]236 }
237
[25427]238 /**
239 * returns the structural information asked for. info_type may be one of
240 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
241 */
242 protected String getStructureInfo(String doc_id, String info_type)
243 {
244 String value = "";
245 if (info_type.equals(INFO_NUM_SIBS))
246 {
247 String parent_id = OID.getParent(doc_id);
248 if (parent_id.equals(doc_id))
249 {
250 value = "0";
251 }
252 else
253 {
254 value = String.valueOf(getNumChildren(parent_id));
255 }
256 return value;
257 }
[8959]258
[25427]259 if (info_type.equals(INFO_NUM_CHILDREN))
260 {
261 return String.valueOf(getNumChildren(doc_id));
262 }
[8959]263
[25427]264 if (info_type.equals(INFO_SIB_POS))
265 {
266 String parent_id = OID.getParent(doc_id);
267 if (parent_id.equals(doc_id))
268 {
269 return "-1";
270 }
[8959]271
[25427]272 DBInfo info = this.coll_db.getInfo(parent_id);
273 if (info == null)
274 {
275 return "-1";
276 }
[8959]277
[25427]278 String contains = info.getInfo("contains");
279 contains = StringUtils.replace(contains, "\"", parent_id);
280 String[] children = contains.split(";");
281 for (int i = 0; i < children.length; i++)
282 {
283 String child_id = children[i];
284 if (child_id.equals(doc_id))
285 {
286 return String.valueOf(i + 1); // make it from 1 to length
287
288 }
289 }
290
291 return "-1";
292 }
[25818]293 if (info_type.equals(INFO_DOC_TYPE))
294
[25427]295 {
[25818]296 return getDocType(doc_id);
[25427]297 }
[25818]298 return null;
[8959]299 }
[14035]300
[25427]301 protected int getNumChildren(String node_id)
302 {
303 DBInfo info = this.coll_db.getInfo(node_id);
304 if (info == null)
305 {
306 return 0;
307 }
308 String contains = info.getInfo("contains");
309 if (contains.equals(""))
310 {
311 return 0;
312 }
313 String[] children = contains.split(";");
314 return children.length;
[8959]315 }
316
[25427]317 /**
318 * returns the document type of the doc that the specified node belongs to.
319 * should be one of GSXML.DOC_TYPE_SIMPLE, GSXML.DOC_TYPE_PAGED,
320 * GSXML.DOC_TYPE_HIERARCHY
321 */
322 protected String getDocType(String node_id)
323 {
324 DBInfo info = this.coll_db.getInfo(node_id);
325 if (info == null)
326 {
327 return GSXML.DOC_TYPE_SIMPLE;
328 }
329 String doc_type = info.getInfo("doctype");
330 if (!doc_type.equals("") && !doc_type.equals("doc"))
331 {
332 return doc_type;
333 }
[8959]334
[25427]335 String top_id = OID.getTop(node_id);
336 boolean is_top = (top_id.equals(node_id) ? true : false);
337
338 String children = info.getInfo("contains");
339 boolean is_leaf = (children.equals("") ? true : false);
340
341 if (is_top && is_leaf)
342 { // a single section document
343 return GSXML.DOC_TYPE_SIMPLE;
[8959]344 }
[25427]345
346 // now we just check the top node
347 if (!is_top)
348 { // we need to look at the top info
349 info = this.coll_db.getInfo(top_id);
350 }
351 if (info == null)
352 {
353 return GSXML.DOC_TYPE_HIERARCHY;
354 }
355
356 String childtype = info.getInfo("childtype");
357 if (childtype.equals("Paged"))
358 {
359 return GSXML.DOC_TYPE_PAGED;
360 }
[25967]361 if (childtype.equals("PagedHierarchy"))
362 {
363 return GSXML.DOC_TYPE_PAGED_HIERARCHY;
364 }
[25427]365 return GSXML.DOC_TYPE_HIERARCHY;
[8959]366 }
[25427]367
368 /**
369 * returns the content of a node should return a nodeContent element:
370 * <nodeContent>text content or other elements</nodeContent>
371 */
372 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
373
374 protected String getMetadata(String node_id, DBInfo info, String metadata, String lang)
375 {
[25805]376 String pos = "";
[25427]377 String relation = "";
378 String separator = ", ";
[25805]379 int index = metadata.indexOf(GSConstants.META_RELATION_SEP);
380 if (index == -1)
[25427]381 {
[25635]382 Vector<String> values = info.getMultiInfo(metadata);
[25427]383 if (values != null)
384 {
385 // just a plain meta entry eg dc.Title
386 StringBuffer result = new StringBuffer();
387 boolean first = true;
388 for (int i = 0; i < values.size(); i++)
389 {
390 if (first)
391 {
392 first = false;
393 }
394 else
395 {
396 result.append(separator);
397 }
[25635]398 result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
[25427]399 }
400 return result.toString();
401 }
402 else
403 {
404 String result = info.getInfo(metadata);
405 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
406 }
[8959]407 }
408
[25805]409 String temp = metadata.substring(0, index);
410 metadata = metadata.substring(index + 1);
411 // check for pos on the front, indicating which piece of meta the user wants
412 // pos can be "first", "last" or the position value of the requested piece of metadata
413 if (temp.startsWith(GSConstants.META_POS))
[25427]414 {
[25805]415 temp = temp.substring(GSConstants.META_POS.length());
416 pos = temp;
417
418 index = metadata.indexOf(GSConstants.META_RELATION_SEP);
419 if (index == -1)
[25427]420 {
421 temp = "";
422 }
423 else
424 {
[25805]425 temp = metadata.substring(0, index);
426 metadata = metadata.substring(index + 1);
[25427]427 }
428 }
[8959]429
[25427]430 // now check for relational info
[25805]431 if (temp.equals("parent") || temp.equals("root") || temp.equals("ancestors")
432 || temp.equals("siblings") || temp.equals("children") || temp.equals("descendants"))
[25427]433 { // "current" "siblings" "children" "descendants"
[25805]434 // gets all siblings by default
[25427]435 relation = temp;
[25805]436 index = metadata.indexOf(GSConstants.META_RELATION_SEP);
437 if (index == -1)
[25427]438 {
439 temp = "";
440 }
441 else
442 {
[25805]443 temp = metadata.substring(0, index);
444 metadata = metadata.substring(index + 1);
[25427]445 }
446 }
[8959]447
[25427]448 // now look for separator info
449 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP))
450 {
451 separator = temp.substring(1, temp.length() - 1);
452
453 }
454
455 String relation_id = node_id;
456 if (relation.equals("parent") || relation.equals("ancestors"))
457 {
458 relation_id = OID.getParent(node_id);
459 // parent or ancestor does not include self
460 if (relation_id.equals(node_id))
461 {
462 return "";
[8959]463 }
464 }
[25427]465 else if (relation.equals("root"))
466 {
467 relation_id = OID.getTop(node_id);
468 }
469
470 // now we either have a single node, or we have ancestors
471 DBInfo relation_info;
472 if (relation_id.equals(node_id))
473 {
474 relation_info = info;
475 }
476 else
477 {
478 relation_info = this.coll_db.getInfo(relation_id);
479 }
480 if (relation_info == null)
481 {
482 return "";
483 }
484
485 StringBuffer result = new StringBuffer();
[25805]486
487 Vector<String> values = relation_info.getMultiInfo(metadata);
[25427]488
[25805]489 if (!pos.equals("")) // if a particular position was specified, so not multiple values for the metadata
[25427]490 {
[25805]491 String meta = "";
492 if (values != null) {
493 if(pos.equals(GSConstants.META_FIRST)) {
494 meta = values.firstElement();
495 } else if(pos.equals(GSConstants.META_LAST)) {
496 meta = values.lastElement();
497 } else {
498 int position = Integer.parseInt(pos);
499 if(position < values.size()) {
500 meta = values.elementAt(position);
501 }
502 }
503 } // else ""
504
505 result.append(this.macro_resolver.resolve(meta, lang, MacroResolver.SCOPE_META, relation_id));
[25427]506 }
507 else
508 {
509 if (values != null)
510 {
511 boolean first = true;
512 for (int i = 0; i < values.size(); i++)
513 {
514 if (first)
515 {
516 first = false;
517 }
518 else
519 {
520 result.append(separator);
521 }
[25635]522 result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
[25427]523 }
524 }
525 logger.info(result);
526 }
527 // if not ancestors, then this is all we do
528 if (!relation.equals("ancestors"))
529 {
530 return result.toString();
531 }
532
533 // now do the ancestors
534 String current_id = relation_id;
535 relation_id = OID.getParent(current_id);
536 while (!relation_id.equals(current_id))
537 {
538 relation_info = this.coll_db.getInfo(relation_id);
539 if (relation_info == null)
540 return result.toString();
[25805]541
542 values = relation_info.getMultiInfo(metadata);
543 if (!pos.equals("")) // if a particular position was specified, so not multiple values for the metadata
[25427]544 {
[25805]545 String meta = "";
546 if (values != null) {
547 if(pos.equals(GSConstants.META_FIRST)) {
548 meta = values.firstElement();
549 } else if(pos.equals(GSConstants.META_LAST)) {
550 meta = values.lastElement();
551 } else {
552 int position = Integer.parseInt(pos);
553 if(position < values.size()) {
554 meta = values.elementAt(position);
555 }
556 }
557 } // else ""
558
[25427]559 result.insert(0, separator);
[25805]560 result.insert(0, this.macro_resolver.resolve(meta, lang, MacroResolver.SCOPE_META, relation_id));
[25427]561 }
562 else
563 {
564 if (values != null)
565 {
566 for (int i = values.size() - 1; i >= 0; i--)
567 {
568 result.insert(0, separator);
[25635]569 result.insert(0, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
[25427]570 }
571 }
572
573 }
574 current_id = relation_id;
575 relation_id = OID.getParent(current_id);
576 }
577 return result.toString();
[8959]578 }
579
[25427]580 /**
581 * needs to get info from collection database - if the calling code gets it
582 * already it may pay to pass it in instead
583 */
584 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
585 {
586 // resolve any collection specific macros
587 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
588 return doc_content;
589 }
[14527]590
[25427]591 protected Element getInfo(String doc_id, String info_type)
592 {
593
594 String value = "";
595 if (info_type.equals(INFO_NUM_SIBS))
596 {
597 String parent_id = OID.getParent(doc_id);
598 if (parent_id.equals(doc_id))
599 {
600 value = "0";
601 }
602 else
603 {
604 value = String.valueOf(getNumChildren(parent_id));
605 }
606 }
607 else if (info_type.equals(INFO_NUM_CHILDREN))
608 {
609 value = String.valueOf(getNumChildren(doc_id));
610 }
611 else if (info_type.equals(INFO_SIB_POS))
612 {
613 String parent_id = OID.getParent(doc_id);
614 if (parent_id.equals(doc_id))
615 {
616 value = "-1";
617 }
618 else
619 {
620 DBInfo info = this.coll_db.getInfo(parent_id);
621 if (info == null)
622 {
623 value = "-1";
624 }
625 else
626 {
627 String contains = info.getInfo("contains");
628 contains = StringUtils.replace(contains, "\"", parent_id);
629 String[] children = contains.split(";");
630 for (int i = 0; i < children.length; i++)
631 {
632 String child_id = children[i];
633 if (child_id.equals(doc_id))
634 {
635 value = String.valueOf(i + 1); // make it from 1 to length
636 break;
637 }
638 }
639 }
640 }
641 }
642 else
643 {
644 return null;
645 }
646 Element info_elem = this.doc.createElement("info");
647 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
648 info_elem.setAttribute(GSXML.VALUE_ATT, value);
649 return info_elem;
650 }
651
652 protected String getHrefOID(String href_url)
653 {
654 return this.coll_db.docnum2OID(href_url);
655 }
656
657}
Note: See TracBrowser for help on using the repository browser.