source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 25967

Last change on this file since 25967 was 25967, checked in by kjdon, 12 years ago

added new doctype pagedhierarchy

  • Property svn:keywords set to Author Date Id Revision
File size: 17.0 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46// Apache Commons
47import org.apache.commons.lang3.*;
48
49/**
50 * Implements the generic retrieval and classifier services for GS2 collections.
51 *
52 * @author Katherine Don
53 * @author Michael Dewsnip
54 */
55
56public abstract class AbstractGS2DocumentRetrieve extends AbstractDocumentRetrieve
57{
58
59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
60
61 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
62 protected String index_stem = null;
63
64 protected SimpleCollectionDatabase coll_db = null;
65
66 /** constructor */
67 protected AbstractGS2DocumentRetrieve()
68 {
69 this.macro_resolver = new GS2MacroResolver();
70 }
71
72 public void cleanUp()
73 {
74 super.cleanUp();
75 this.coll_db.closeDatabase();
76 }
77
78 /** configure this service */
79 public boolean configure(Element info, Element extra_info)
80 {
81 if (!super.configure(info, extra_info))
82 {
83 return false;
84 }
85
86 logger.info("Configuring AbstractGS2DocumentRetrieve...");
87 //this.config_info = info;
88
89 // the index stem is either specified in the config file or is the collection name
90 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
91 if (index_stem_elem != null)
92 {
93 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
94 }
95 if (this.index_stem == null || this.index_stem.equals(""))
96 {
97 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
98 this.index_stem = this.cluster_name;
99 }
100
101 // find out what kind of database we have
102 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
103 String database_type = null;
104 if (database_type_elem != null)
105 {
106 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
107 }
108 if (database_type == null || database_type.equals(""))
109 {
110 database_type = "gdbm"; // the default
111 }
112 coll_db = new SimpleCollectionDatabase(database_type);
113 if (!coll_db.databaseOK())
114 {
115 logger.error("Couldn't create the collection database of type " + database_type);
116 return false;
117 }
118
119 // Open database for querying
120 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
121 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ))
122 {
123 logger.error("Could not open collection database!");
124 return false;
125 }
126
127 // we need to set the database for our GS2 macro resolver
128 GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver) this.macro_resolver;
129 gs2_macro_resolver.setDB(this.coll_db);
130
131 return true;
132 }
133
134 /** if id ends in .fc, .pc etc, then translate it to the correct id */
135 protected String translateId(String node_id)
136 {
137 return OID.translateOID(this.coll_db, node_id); //return this.coll_db.translateOID(node_id);
138 }
139
140 /**
141 * if an id is not a greenstone id (an external id) then translate it to a
142 * greenstone one
143 */
144 protected String translateExternalId(String node_id)
145 {
146 return this.coll_db.externalId2OID(node_id);
147 }
148
149 /**
150 * returns the id of the root node of the document containing node node_id.
151 * . may be the same as node_id
152 */
153 protected String getRootId(String node_id)
154 {
155 return OID.getTop(node_id);
156 }
157
158 /** returns a list of the child ids in order, null if no children */
159 protected ArrayList<String> getChildrenIds(String node_id)
160 {
161 DBInfo info = this.coll_db.getInfo(node_id);
162 if (info == null)
163 {
164 return null;
165 }
166
167 String contains = info.getInfo("contains");
168 if (contains.equals(""))
169 {
170 return null;
171 }
172 ArrayList<String> children = new ArrayList<String>();
173 StringTokenizer st = new StringTokenizer(contains, ";");
174 while (st.hasMoreTokens())
175 {
176 String child_id = StringUtils.replace(st.nextToken(), "\"", node_id);
177 children.add(child_id);
178 }
179 return children;
180
181 }
182
183 /** returns the node id of the parent node, null if no parent */
184 protected String getParentId(String node_id)
185 {
186 String parent = OID.getParent(node_id);
187 if (parent.equals(node_id))
188 {
189 return null;
190 }
191 return parent;
192 }
193
194 /**
195 * get the metadata for the classifier node node_id returns a metadataList
196 * element: <metadataList><metadata
197 * name="xxx">value</metadata></metadataList>
198 */
199 // assumes only one value per metadata
200 protected Element getMetadataList(String node_id, boolean all_metadata, ArrayList<String> metadata_names) throws GSException
201 {
202 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
203 DBInfo info = this.coll_db.getInfo(node_id);
204 if (info == null)
205 {
206 return null;
207 }
208 String lang = "en"; // why do we need this??
209 if (all_metadata)
210 {
211 // return everything out of the database
212 Set<String> keys = info.getKeys();
213 Iterator<String> it = keys.iterator();
214 while (it.hasNext())
215 {
216 String key = it.next();
217 //String value = info.getInfo(key);
218 Vector<String> values = info.getMultiInfo(key);
219 for (int i = 0; i < values.size(); i++)
220 {
221 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
222 }
223 }
224
225 }
226 else
227 {
228 for (int i = 0; i < metadata_names.size(); i++)
229 {
230 String meta_name = metadata_names.get(i);
231 String value = getMetadata(node_id, info, meta_name, lang);
232 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
233 }
234 }
235 return metadata_list;
236 }
237
238 /**
239 * returns the structural information asked for. info_type may be one of
240 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
241 */
242 protected String getStructureInfo(String doc_id, String info_type)
243 {
244 String value = "";
245 if (info_type.equals(INFO_NUM_SIBS))
246 {
247 String parent_id = OID.getParent(doc_id);
248 if (parent_id.equals(doc_id))
249 {
250 value = "0";
251 }
252 else
253 {
254 value = String.valueOf(getNumChildren(parent_id));
255 }
256 return value;
257 }
258
259 if (info_type.equals(INFO_NUM_CHILDREN))
260 {
261 return String.valueOf(getNumChildren(doc_id));
262 }
263
264 if (info_type.equals(INFO_SIB_POS))
265 {
266 String parent_id = OID.getParent(doc_id);
267 if (parent_id.equals(doc_id))
268 {
269 return "-1";
270 }
271
272 DBInfo info = this.coll_db.getInfo(parent_id);
273 if (info == null)
274 {
275 return "-1";
276 }
277
278 String contains = info.getInfo("contains");
279 contains = StringUtils.replace(contains, "\"", parent_id);
280 String[] children = contains.split(";");
281 for (int i = 0; i < children.length; i++)
282 {
283 String child_id = children[i];
284 if (child_id.equals(doc_id))
285 {
286 return String.valueOf(i + 1); // make it from 1 to length
287
288 }
289 }
290
291 return "-1";
292 }
293 if (info_type.equals(INFO_DOC_TYPE))
294
295 {
296 return getDocType(doc_id);
297 }
298 return null;
299 }
300
301 protected int getNumChildren(String node_id)
302 {
303 DBInfo info = this.coll_db.getInfo(node_id);
304 if (info == null)
305 {
306 return 0;
307 }
308 String contains = info.getInfo("contains");
309 if (contains.equals(""))
310 {
311 return 0;
312 }
313 String[] children = contains.split(";");
314 return children.length;
315 }
316
317 /**
318 * returns the document type of the doc that the specified node belongs to.
319 * should be one of GSXML.DOC_TYPE_SIMPLE, GSXML.DOC_TYPE_PAGED,
320 * GSXML.DOC_TYPE_HIERARCHY
321 */
322 protected String getDocType(String node_id)
323 {
324 DBInfo info = this.coll_db.getInfo(node_id);
325 if (info == null)
326 {
327 return GSXML.DOC_TYPE_SIMPLE;
328 }
329 String doc_type = info.getInfo("doctype");
330 if (!doc_type.equals("") && !doc_type.equals("doc"))
331 {
332 return doc_type;
333 }
334
335 String top_id = OID.getTop(node_id);
336 boolean is_top = (top_id.equals(node_id) ? true : false);
337
338 String children = info.getInfo("contains");
339 boolean is_leaf = (children.equals("") ? true : false);
340
341 if (is_top && is_leaf)
342 { // a single section document
343 return GSXML.DOC_TYPE_SIMPLE;
344 }
345
346 // now we just check the top node
347 if (!is_top)
348 { // we need to look at the top info
349 info = this.coll_db.getInfo(top_id);
350 }
351 if (info == null)
352 {
353 return GSXML.DOC_TYPE_HIERARCHY;
354 }
355
356 String childtype = info.getInfo("childtype");
357 if (childtype.equals("Paged"))
358 {
359 return GSXML.DOC_TYPE_PAGED;
360 }
361 if (childtype.equals("PagedHierarchy"))
362 {
363 return GSXML.DOC_TYPE_PAGED_HIERARCHY;
364 }
365 return GSXML.DOC_TYPE_HIERARCHY;
366 }
367
368 /**
369 * returns the content of a node should return a nodeContent element:
370 * <nodeContent>text content or other elements</nodeContent>
371 */
372 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
373
374 protected String getMetadata(String node_id, DBInfo info, String metadata, String lang)
375 {
376 String pos = "";
377 String relation = "";
378 String separator = ", ";
379 int index = metadata.indexOf(GSConstants.META_RELATION_SEP);
380 if (index == -1)
381 {
382 Vector<String> values = info.getMultiInfo(metadata);
383 if (values != null)
384 {
385 // just a plain meta entry eg dc.Title
386 StringBuffer result = new StringBuffer();
387 boolean first = true;
388 for (int i = 0; i < values.size(); i++)
389 {
390 if (first)
391 {
392 first = false;
393 }
394 else
395 {
396 result.append(separator);
397 }
398 result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
399 }
400 return result.toString();
401 }
402 else
403 {
404 String result = info.getInfo(metadata);
405 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
406 }
407 }
408
409 String temp = metadata.substring(0, index);
410 metadata = metadata.substring(index + 1);
411 // check for pos on the front, indicating which piece of meta the user wants
412 // pos can be "first", "last" or the position value of the requested piece of metadata
413 if (temp.startsWith(GSConstants.META_POS))
414 {
415 temp = temp.substring(GSConstants.META_POS.length());
416 pos = temp;
417
418 index = metadata.indexOf(GSConstants.META_RELATION_SEP);
419 if (index == -1)
420 {
421 temp = "";
422 }
423 else
424 {
425 temp = metadata.substring(0, index);
426 metadata = metadata.substring(index + 1);
427 }
428 }
429
430 // now check for relational info
431 if (temp.equals("parent") || temp.equals("root") || temp.equals("ancestors")
432 || temp.equals("siblings") || temp.equals("children") || temp.equals("descendants"))
433 { // "current" "siblings" "children" "descendants"
434 // gets all siblings by default
435 relation = temp;
436 index = metadata.indexOf(GSConstants.META_RELATION_SEP);
437 if (index == -1)
438 {
439 temp = "";
440 }
441 else
442 {
443 temp = metadata.substring(0, index);
444 metadata = metadata.substring(index + 1);
445 }
446 }
447
448 // now look for separator info
449 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP))
450 {
451 separator = temp.substring(1, temp.length() - 1);
452
453 }
454
455 String relation_id = node_id;
456 if (relation.equals("parent") || relation.equals("ancestors"))
457 {
458 relation_id = OID.getParent(node_id);
459 // parent or ancestor does not include self
460 if (relation_id.equals(node_id))
461 {
462 return "";
463 }
464 }
465 else if (relation.equals("root"))
466 {
467 relation_id = OID.getTop(node_id);
468 }
469
470 // now we either have a single node, or we have ancestors
471 DBInfo relation_info;
472 if (relation_id.equals(node_id))
473 {
474 relation_info = info;
475 }
476 else
477 {
478 relation_info = this.coll_db.getInfo(relation_id);
479 }
480 if (relation_info == null)
481 {
482 return "";
483 }
484
485 StringBuffer result = new StringBuffer();
486
487 Vector<String> values = relation_info.getMultiInfo(metadata);
488
489 if (!pos.equals("")) // if a particular position was specified, so not multiple values for the metadata
490 {
491 String meta = "";
492 if (values != null) {
493 if(pos.equals(GSConstants.META_FIRST)) {
494 meta = values.firstElement();
495 } else if(pos.equals(GSConstants.META_LAST)) {
496 meta = values.lastElement();
497 } else {
498 int position = Integer.parseInt(pos);
499 if(position < values.size()) {
500 meta = values.elementAt(position);
501 }
502 }
503 } // else ""
504
505 result.append(this.macro_resolver.resolve(meta, lang, MacroResolver.SCOPE_META, relation_id));
506 }
507 else
508 {
509 if (values != null)
510 {
511 boolean first = true;
512 for (int i = 0; i < values.size(); i++)
513 {
514 if (first)
515 {
516 first = false;
517 }
518 else
519 {
520 result.append(separator);
521 }
522 result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
523 }
524 }
525 logger.info(result);
526 }
527 // if not ancestors, then this is all we do
528 if (!relation.equals("ancestors"))
529 {
530 return result.toString();
531 }
532
533 // now do the ancestors
534 String current_id = relation_id;
535 relation_id = OID.getParent(current_id);
536 while (!relation_id.equals(current_id))
537 {
538 relation_info = this.coll_db.getInfo(relation_id);
539 if (relation_info == null)
540 return result.toString();
541
542 values = relation_info.getMultiInfo(metadata);
543 if (!pos.equals("")) // if a particular position was specified, so not multiple values for the metadata
544 {
545 String meta = "";
546 if (values != null) {
547 if(pos.equals(GSConstants.META_FIRST)) {
548 meta = values.firstElement();
549 } else if(pos.equals(GSConstants.META_LAST)) {
550 meta = values.lastElement();
551 } else {
552 int position = Integer.parseInt(pos);
553 if(position < values.size()) {
554 meta = values.elementAt(position);
555 }
556 }
557 } // else ""
558
559 result.insert(0, separator);
560 result.insert(0, this.macro_resolver.resolve(meta, lang, MacroResolver.SCOPE_META, relation_id));
561 }
562 else
563 {
564 if (values != null)
565 {
566 for (int i = values.size() - 1; i >= 0; i--)
567 {
568 result.insert(0, separator);
569 result.insert(0, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
570 }
571 }
572
573 }
574 current_id = relation_id;
575 relation_id = OID.getParent(current_id);
576 }
577 return result.toString();
578 }
579
580 /**
581 * needs to get info from collection database - if the calling code gets it
582 * already it may pay to pass it in instead
583 */
584 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
585 {
586 // resolve any collection specific macros
587 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
588 return doc_content;
589 }
590
591 protected Element getInfo(String doc_id, String info_type)
592 {
593
594 String value = "";
595 if (info_type.equals(INFO_NUM_SIBS))
596 {
597 String parent_id = OID.getParent(doc_id);
598 if (parent_id.equals(doc_id))
599 {
600 value = "0";
601 }
602 else
603 {
604 value = String.valueOf(getNumChildren(parent_id));
605 }
606 }
607 else if (info_type.equals(INFO_NUM_CHILDREN))
608 {
609 value = String.valueOf(getNumChildren(doc_id));
610 }
611 else if (info_type.equals(INFO_SIB_POS))
612 {
613 String parent_id = OID.getParent(doc_id);
614 if (parent_id.equals(doc_id))
615 {
616 value = "-1";
617 }
618 else
619 {
620 DBInfo info = this.coll_db.getInfo(parent_id);
621 if (info == null)
622 {
623 value = "-1";
624 }
625 else
626 {
627 String contains = info.getInfo("contains");
628 contains = StringUtils.replace(contains, "\"", parent_id);
629 String[] children = contains.split(";");
630 for (int i = 0; i < children.length; i++)
631 {
632 String child_id = children[i];
633 if (child_id.equals(doc_id))
634 {
635 value = String.valueOf(i + 1); // make it from 1 to length
636 break;
637 }
638 }
639 }
640 }
641 }
642 else
643 {
644 return null;
645 }
646 Element info_elem = this.doc.createElement("info");
647 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
648 info_elem.setAttribute(GSXML.VALUE_ATT, value);
649 return info_elem;
650 }
651
652 protected String getHrefOID(String href_url)
653 {
654 return this.coll_db.docnum2OID(href_url);
655 }
656
657}
Note: See TracBrowser for help on using the repository browser.