source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 25818

Last change on this file since 25818 was 25818, checked in by kjdon, 12 years ago

new structure info type - doctype, to return the doc type of a doc id

  • Property svn:keywords set to Author Date Id Revision
File size: 16.9 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46// Apache Commons
47import org.apache.commons.lang3.*;
48
49/**
50 * Implements the generic retrieval and classifier services for GS2 collections.
51 *
52 * @author Katherine Don
53 * @author Michael Dewsnip
54 */
55
56public abstract class AbstractGS2DocumentRetrieve extends AbstractDocumentRetrieve
57{
58
59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
60
61 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
62 protected String index_stem = null;
63
64 protected SimpleCollectionDatabase coll_db = null;
65
66 /** constructor */
67 protected AbstractGS2DocumentRetrieve()
68 {
69 this.macro_resolver = new GS2MacroResolver();
70 }
71
72 public void cleanUp()
73 {
74 super.cleanUp();
75 this.coll_db.closeDatabase();
76 }
77
78 /** configure this service */
79 public boolean configure(Element info, Element extra_info)
80 {
81 if (!super.configure(info, extra_info))
82 {
83 return false;
84 }
85
86 logger.info("Configuring AbstractGS2DocumentRetrieve...");
87 //this.config_info = info;
88
89 // the index stem is either specified in the config file or is the collection name
90 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
91 if (index_stem_elem != null)
92 {
93 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
94 }
95 if (this.index_stem == null || this.index_stem.equals(""))
96 {
97 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
98 this.index_stem = this.cluster_name;
99 }
100
101 // find out what kind of database we have
102 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
103 String database_type = null;
104 if (database_type_elem != null)
105 {
106 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
107 }
108 if (database_type == null || database_type.equals(""))
109 {
110 database_type = "gdbm"; // the default
111 }
112 coll_db = new SimpleCollectionDatabase(database_type);
113 if (!coll_db.databaseOK())
114 {
115 logger.error("Couldn't create the collection database of type " + database_type);
116 return false;
117 }
118
119 // Open database for querying
120 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
121 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ))
122 {
123 logger.error("Could not open collection database!");
124 return false;
125 }
126
127 // we need to set the database for our GS2 macro resolver
128 GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver) this.macro_resolver;
129 gs2_macro_resolver.setDB(this.coll_db);
130
131 return true;
132 }
133
134 /** if id ends in .fc, .pc etc, then translate it to the correct id */
135 protected String translateId(String node_id)
136 {
137 return OID.translateOID(this.coll_db, node_id); //return this.coll_db.translateOID(node_id);
138 }
139
140 /**
141 * if an id is not a greenstone id (an external id) then translate it to a
142 * greenstone one
143 */
144 protected String translateExternalId(String node_id)
145 {
146 return this.coll_db.externalId2OID(node_id);
147 }
148
149 /**
150 * returns the id of the root node of the document containing node node_id.
151 * . may be the same as node_id
152 */
153 protected String getRootId(String node_id)
154 {
155 return OID.getTop(node_id);
156 }
157
158 /** returns a list of the child ids in order, null if no children */
159 protected ArrayList<String> getChildrenIds(String node_id)
160 {
161 DBInfo info = this.coll_db.getInfo(node_id);
162 if (info == null)
163 {
164 return null;
165 }
166
167 String contains = info.getInfo("contains");
168 if (contains.equals(""))
169 {
170 return null;
171 }
172 ArrayList<String> children = new ArrayList<String>();
173 StringTokenizer st = new StringTokenizer(contains, ";");
174 while (st.hasMoreTokens())
175 {
176 String child_id = StringUtils.replace(st.nextToken(), "\"", node_id);
177 children.add(child_id);
178 }
179 return children;
180
181 }
182
183 /** returns the node id of the parent node, null if no parent */
184 protected String getParentId(String node_id)
185 {
186 String parent = OID.getParent(node_id);
187 if (parent.equals(node_id))
188 {
189 return null;
190 }
191 return parent;
192 }
193
194 /**
195 * get the metadata for the classifier node node_id returns a metadataList
196 * element: <metadataList><metadata
197 * name="xxx">value</metadata></metadataList>
198 */
199 // assumes only one value per metadata
200 protected Element getMetadataList(String node_id, boolean all_metadata, ArrayList<String> metadata_names) throws GSException
201 {
202 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
203 DBInfo info = this.coll_db.getInfo(node_id);
204 if (info == null)
205 {
206 return null;
207 }
208 String lang = "en"; // why do we need this??
209 if (all_metadata)
210 {
211 // return everything out of the database
212 Set<String> keys = info.getKeys();
213 Iterator<String> it = keys.iterator();
214 while (it.hasNext())
215 {
216 String key = it.next();
217 //String value = info.getInfo(key);
218 Vector<String> values = info.getMultiInfo(key);
219 for (int i = 0; i < values.size(); i++)
220 {
221 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
222 }
223 }
224
225 }
226 else
227 {
228 for (int i = 0; i < metadata_names.size(); i++)
229 {
230 String meta_name = metadata_names.get(i);
231 String value = getMetadata(node_id, info, meta_name, lang);
232 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
233 }
234 }
235 return metadata_list;
236 }
237
238 /**
239 * returns the structural information asked for. info_type may be one of
240 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
241 */
242 protected String getStructureInfo(String doc_id, String info_type)
243 {
244 String value = "";
245 if (info_type.equals(INFO_NUM_SIBS))
246 {
247 String parent_id = OID.getParent(doc_id);
248 if (parent_id.equals(doc_id))
249 {
250 value = "0";
251 }
252 else
253 {
254 value = String.valueOf(getNumChildren(parent_id));
255 }
256 return value;
257 }
258
259 if (info_type.equals(INFO_NUM_CHILDREN))
260 {
261 return String.valueOf(getNumChildren(doc_id));
262 }
263
264 if (info_type.equals(INFO_SIB_POS))
265 {
266 String parent_id = OID.getParent(doc_id);
267 if (parent_id.equals(doc_id))
268 {
269 return "-1";
270 }
271
272 DBInfo info = this.coll_db.getInfo(parent_id);
273 if (info == null)
274 {
275 return "-1";
276 }
277
278 String contains = info.getInfo("contains");
279 contains = StringUtils.replace(contains, "\"", parent_id);
280 String[] children = contains.split(";");
281 for (int i = 0; i < children.length; i++)
282 {
283 String child_id = children[i];
284 if (child_id.equals(doc_id))
285 {
286 return String.valueOf(i + 1); // make it from 1 to length
287
288 }
289 }
290
291 return "-1";
292 }
293 if (info_type.equals(INFO_DOC_TYPE))
294
295 {
296 return getDocType(doc_id);
297 }
298 return null;
299 }
300
301 protected int getNumChildren(String node_id)
302 {
303 DBInfo info = this.coll_db.getInfo(node_id);
304 if (info == null)
305 {
306 return 0;
307 }
308 String contains = info.getInfo("contains");
309 if (contains.equals(""))
310 {
311 return 0;
312 }
313 String[] children = contains.split(";");
314 return children.length;
315 }
316
317 /**
318 * returns the document type of the doc that the specified node belongs to.
319 * should be one of GSXML.DOC_TYPE_SIMPLE, GSXML.DOC_TYPE_PAGED,
320 * GSXML.DOC_TYPE_HIERARCHY
321 */
322 protected String getDocType(String node_id)
323 {
324 DBInfo info = this.coll_db.getInfo(node_id);
325 if (info == null)
326 {
327 return GSXML.DOC_TYPE_SIMPLE;
328 }
329 String doc_type = info.getInfo("doctype");
330 if (!doc_type.equals("") && !doc_type.equals("doc"))
331 {
332 return doc_type;
333 }
334
335 String top_id = OID.getTop(node_id);
336 boolean is_top = (top_id.equals(node_id) ? true : false);
337
338 String children = info.getInfo("contains");
339 boolean is_leaf = (children.equals("") ? true : false);
340
341 if (is_top && is_leaf)
342 { // a single section document
343 return GSXML.DOC_TYPE_SIMPLE;
344 }
345
346 // now we just check the top node
347 if (!is_top)
348 { // we need to look at the top info
349 info = this.coll_db.getInfo(top_id);
350 }
351 if (info == null)
352 {
353 return GSXML.DOC_TYPE_HIERARCHY;
354 }
355
356 String childtype = info.getInfo("childtype");
357 if (childtype.equals("Paged"))
358 {
359 return GSXML.DOC_TYPE_PAGED;
360 }
361 return GSXML.DOC_TYPE_HIERARCHY;
362 }
363
364 /**
365 * returns the content of a node should return a nodeContent element:
366 * <nodeContent>text content or other elements</nodeContent>
367 */
368 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
369
370 protected String getMetadata(String node_id, DBInfo info, String metadata, String lang)
371 {
372 String pos = "";
373 String relation = "";
374 String separator = ", ";
375 int index = metadata.indexOf(GSConstants.META_RELATION_SEP);
376 if (index == -1)
377 {
378 Vector<String> values = info.getMultiInfo(metadata);
379 if (values != null)
380 {
381 // just a plain meta entry eg dc.Title
382 StringBuffer result = new StringBuffer();
383 boolean first = true;
384 for (int i = 0; i < values.size(); i++)
385 {
386 if (first)
387 {
388 first = false;
389 }
390 else
391 {
392 result.append(separator);
393 }
394 result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
395 }
396 return result.toString();
397 }
398 else
399 {
400 String result = info.getInfo(metadata);
401 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
402 }
403 }
404
405 String temp = metadata.substring(0, index);
406 metadata = metadata.substring(index + 1);
407 // check for pos on the front, indicating which piece of meta the user wants
408 // pos can be "first", "last" or the position value of the requested piece of metadata
409 if (temp.startsWith(GSConstants.META_POS))
410 {
411 temp = temp.substring(GSConstants.META_POS.length());
412 pos = temp;
413
414 index = metadata.indexOf(GSConstants.META_RELATION_SEP);
415 if (index == -1)
416 {
417 temp = "";
418 }
419 else
420 {
421 temp = metadata.substring(0, index);
422 metadata = metadata.substring(index + 1);
423 }
424 }
425
426 // now check for relational info
427 if (temp.equals("parent") || temp.equals("root") || temp.equals("ancestors")
428 || temp.equals("siblings") || temp.equals("children") || temp.equals("descendants"))
429 { // "current" "siblings" "children" "descendants"
430 // gets all siblings by default
431 relation = temp;
432 index = metadata.indexOf(GSConstants.META_RELATION_SEP);
433 if (index == -1)
434 {
435 temp = "";
436 }
437 else
438 {
439 temp = metadata.substring(0, index);
440 metadata = metadata.substring(index + 1);
441 }
442 }
443
444 // now look for separator info
445 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP))
446 {
447 separator = temp.substring(1, temp.length() - 1);
448
449 }
450
451 String relation_id = node_id;
452 if (relation.equals("parent") || relation.equals("ancestors"))
453 {
454 relation_id = OID.getParent(node_id);
455 // parent or ancestor does not include self
456 if (relation_id.equals(node_id))
457 {
458 return "";
459 }
460 }
461 else if (relation.equals("root"))
462 {
463 relation_id = OID.getTop(node_id);
464 }
465
466 // now we either have a single node, or we have ancestors
467 DBInfo relation_info;
468 if (relation_id.equals(node_id))
469 {
470 relation_info = info;
471 }
472 else
473 {
474 relation_info = this.coll_db.getInfo(relation_id);
475 }
476 if (relation_info == null)
477 {
478 return "";
479 }
480
481 StringBuffer result = new StringBuffer();
482
483 Vector<String> values = relation_info.getMultiInfo(metadata);
484
485 if (!pos.equals("")) // if a particular position was specified, so not multiple values for the metadata
486 {
487 String meta = "";
488 if (values != null) {
489 if(pos.equals(GSConstants.META_FIRST)) {
490 meta = values.firstElement();
491 } else if(pos.equals(GSConstants.META_LAST)) {
492 meta = values.lastElement();
493 } else {
494 int position = Integer.parseInt(pos);
495 if(position < values.size()) {
496 meta = values.elementAt(position);
497 }
498 }
499 } // else ""
500
501 result.append(this.macro_resolver.resolve(meta, lang, MacroResolver.SCOPE_META, relation_id));
502 }
503 else
504 {
505 if (values != null)
506 {
507 boolean first = true;
508 for (int i = 0; i < values.size(); i++)
509 {
510 if (first)
511 {
512 first = false;
513 }
514 else
515 {
516 result.append(separator);
517 }
518 result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
519 }
520 }
521 logger.info(result);
522 }
523 // if not ancestors, then this is all we do
524 if (!relation.equals("ancestors"))
525 {
526 return result.toString();
527 }
528
529 // now do the ancestors
530 String current_id = relation_id;
531 relation_id = OID.getParent(current_id);
532 while (!relation_id.equals(current_id))
533 {
534 relation_info = this.coll_db.getInfo(relation_id);
535 if (relation_info == null)
536 return result.toString();
537
538 values = relation_info.getMultiInfo(metadata);
539 if (!pos.equals("")) // if a particular position was specified, so not multiple values for the metadata
540 {
541 String meta = "";
542 if (values != null) {
543 if(pos.equals(GSConstants.META_FIRST)) {
544 meta = values.firstElement();
545 } else if(pos.equals(GSConstants.META_LAST)) {
546 meta = values.lastElement();
547 } else {
548 int position = Integer.parseInt(pos);
549 if(position < values.size()) {
550 meta = values.elementAt(position);
551 }
552 }
553 } // else ""
554
555 result.insert(0, separator);
556 result.insert(0, this.macro_resolver.resolve(meta, lang, MacroResolver.SCOPE_META, relation_id));
557 }
558 else
559 {
560 if (values != null)
561 {
562 for (int i = values.size() - 1; i >= 0; i--)
563 {
564 result.insert(0, separator);
565 result.insert(0, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
566 }
567 }
568
569 }
570 current_id = relation_id;
571 relation_id = OID.getParent(current_id);
572 }
573 return result.toString();
574 }
575
576 /**
577 * needs to get info from collection database - if the calling code gets it
578 * already it may pay to pass it in instead
579 */
580 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
581 {
582 // resolve any collection specific macros
583 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
584 return doc_content;
585 }
586
587 protected Element getInfo(String doc_id, String info_type)
588 {
589
590 String value = "";
591 if (info_type.equals(INFO_NUM_SIBS))
592 {
593 String parent_id = OID.getParent(doc_id);
594 if (parent_id.equals(doc_id))
595 {
596 value = "0";
597 }
598 else
599 {
600 value = String.valueOf(getNumChildren(parent_id));
601 }
602 }
603 else if (info_type.equals(INFO_NUM_CHILDREN))
604 {
605 value = String.valueOf(getNumChildren(doc_id));
606 }
607 else if (info_type.equals(INFO_SIB_POS))
608 {
609 String parent_id = OID.getParent(doc_id);
610 if (parent_id.equals(doc_id))
611 {
612 value = "-1";
613 }
614 else
615 {
616 DBInfo info = this.coll_db.getInfo(parent_id);
617 if (info == null)
618 {
619 value = "-1";
620 }
621 else
622 {
623 String contains = info.getInfo("contains");
624 contains = StringUtils.replace(contains, "\"", parent_id);
625 String[] children = contains.split(";");
626 for (int i = 0; i < children.length; i++)
627 {
628 String child_id = children[i];
629 if (child_id.equals(doc_id))
630 {
631 value = String.valueOf(i + 1); // make it from 1 to length
632 break;
633 }
634 }
635 }
636 }
637 }
638 else
639 {
640 return null;
641 }
642 Element info_elem = this.doc.createElement("info");
643 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
644 info_elem.setAttribute(GSXML.VALUE_ATT, value);
645 return info_elem;
646 }
647
648 protected String getHrefOID(String href_url)
649 {
650 return this.coll_db.docnum2OID(href_url);
651 }
652
653}
Note: See TracBrowser for help on using the repository browser.