source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 25805

Last change on this file since 25805 was 25805, checked in by ak19, 12 years ago

Asking for a piece of metadata like dc.Creator returns all values for dc.Creator. So multiple=true is always the case by default. No multiple=false was defined. Moreover, there was no way of getting a single value, which was the default in GS2 and which returned the first value for the requested metadata. Now multiple is no longer used, as all values are (still) returned by default. Instead the pos attribute has been introduced, which can be the terms first or last, or else it can be a number representing which value for that metadata needs to be returned.

  • Property svn:keywords set to Author Date Id Revision
File size: 16.8 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46// Apache Commons
47import org.apache.commons.lang3.*;
48
49/**
50 * Implements the generic retrieval and classifier services for GS2 collections.
51 *
52 * @author Katherine Don
53 * @author Michael Dewsnip
54 */
55
56public abstract class AbstractGS2DocumentRetrieve extends AbstractDocumentRetrieve
57{
58
59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
60
61 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
62 protected String index_stem = null;
63
64 protected SimpleCollectionDatabase coll_db = null;
65
66 /** constructor */
67 protected AbstractGS2DocumentRetrieve()
68 {
69 this.macro_resolver = new GS2MacroResolver();
70 }
71
72 public void cleanUp()
73 {
74 super.cleanUp();
75 this.coll_db.closeDatabase();
76 }
77
78 /** configure this service */
79 public boolean configure(Element info, Element extra_info)
80 {
81 if (!super.configure(info, extra_info))
82 {
83 return false;
84 }
85
86 logger.info("Configuring AbstractGS2DocumentRetrieve...");
87 //this.config_info = info;
88
89 // the index stem is either specified in the config file or is the collection name
90 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
91 if (index_stem_elem != null)
92 {
93 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
94 }
95 if (this.index_stem == null || this.index_stem.equals(""))
96 {
97 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
98 this.index_stem = this.cluster_name;
99 }
100
101 // find out what kind of database we have
102 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
103 String database_type = null;
104 if (database_type_elem != null)
105 {
106 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
107 }
108 if (database_type == null || database_type.equals(""))
109 {
110 database_type = "gdbm"; // the default
111 }
112 coll_db = new SimpleCollectionDatabase(database_type);
113 if (!coll_db.databaseOK())
114 {
115 logger.error("Couldn't create the collection database of type " + database_type);
116 return false;
117 }
118
119 // Open database for querying
120 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
121 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ))
122 {
123 logger.error("Could not open collection database!");
124 return false;
125 }
126
127 // we need to set the database for our GS2 macro resolver
128 GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver) this.macro_resolver;
129 gs2_macro_resolver.setDB(this.coll_db);
130
131 return true;
132 }
133
134 /** if id ends in .fc, .pc etc, then translate it to the correct id */
135 protected String translateId(String node_id)
136 {
137 return OID.translateOID(this.coll_db, node_id); //return this.coll_db.translateOID(node_id);
138 }
139
140 /**
141 * if an id is not a greenstone id (an external id) then translate it to a
142 * greenstone one
143 */
144 protected String translateExternalId(String node_id)
145 {
146 return this.coll_db.externalId2OID(node_id);
147 }
148
149 /**
150 * returns the id of the root node of the document containing node node_id.
151 * . may be the same as node_id
152 */
153 protected String getRootId(String node_id)
154 {
155 return OID.getTop(node_id);
156 }
157
158 /** returns a list of the child ids in order, null if no children */
159 protected ArrayList<String> getChildrenIds(String node_id)
160 {
161 DBInfo info = this.coll_db.getInfo(node_id);
162 if (info == null)
163 {
164 return null;
165 }
166
167 String contains = info.getInfo("contains");
168 if (contains.equals(""))
169 {
170 return null;
171 }
172 ArrayList<String> children = new ArrayList<String>();
173 StringTokenizer st = new StringTokenizer(contains, ";");
174 while (st.hasMoreTokens())
175 {
176 String child_id = StringUtils.replace(st.nextToken(), "\"", node_id);
177 children.add(child_id);
178 }
179 return children;
180
181 }
182
183 /** returns the node id of the parent node, null if no parent */
184 protected String getParentId(String node_id)
185 {
186 String parent = OID.getParent(node_id);
187 if (parent.equals(node_id))
188 {
189 return null;
190 }
191 return parent;
192 }
193
194 /**
195 * get the metadata for the classifier node node_id returns a metadataList
196 * element: <metadataList><metadata
197 * name="xxx">value</metadata></metadataList>
198 */
199 // assumes only one value per metadata
200 protected Element getMetadataList(String node_id, boolean all_metadata, ArrayList<String> metadata_names) throws GSException
201 {
202 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
203 DBInfo info = this.coll_db.getInfo(node_id);
204 if (info == null)
205 {
206 return null;
207 }
208 String lang = "en"; // why do we need this??
209 if (all_metadata)
210 {
211 // return everything out of the database
212 Set<String> keys = info.getKeys();
213 Iterator<String> it = keys.iterator();
214 while (it.hasNext())
215 {
216 String key = it.next();
217 //String value = info.getInfo(key);
218 Vector<String> values = info.getMultiInfo(key);
219 for (int i = 0; i < values.size(); i++)
220 {
221 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
222 }
223 }
224
225 }
226 else
227 {
228 for (int i = 0; i < metadata_names.size(); i++)
229 {
230 String meta_name = metadata_names.get(i);
231 String value = getMetadata(node_id, info, meta_name, lang);
232 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
233 }
234 }
235 return metadata_list;
236 }
237
238 /**
239 * returns the structural information asked for. info_type may be one of
240 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
241 */
242 protected String getStructureInfo(String doc_id, String info_type)
243 {
244 String value = "";
245 if (info_type.equals(INFO_NUM_SIBS))
246 {
247 String parent_id = OID.getParent(doc_id);
248 if (parent_id.equals(doc_id))
249 {
250 value = "0";
251 }
252 else
253 {
254 value = String.valueOf(getNumChildren(parent_id));
255 }
256 return value;
257 }
258
259 if (info_type.equals(INFO_NUM_CHILDREN))
260 {
261 return String.valueOf(getNumChildren(doc_id));
262 }
263
264 if (info_type.equals(INFO_SIB_POS))
265 {
266 String parent_id = OID.getParent(doc_id);
267 if (parent_id.equals(doc_id))
268 {
269 return "-1";
270 }
271
272 DBInfo info = this.coll_db.getInfo(parent_id);
273 if (info == null)
274 {
275 return "-1";
276 }
277
278 String contains = info.getInfo("contains");
279 contains = StringUtils.replace(contains, "\"", parent_id);
280 String[] children = contains.split(";");
281 for (int i = 0; i < children.length; i++)
282 {
283 String child_id = children[i];
284 if (child_id.equals(doc_id))
285 {
286 return String.valueOf(i + 1); // make it from 1 to length
287
288 }
289 }
290
291 return "-1";
292 }
293 else
294 {
295 return null;
296 }
297
298 }
299
300 protected int getNumChildren(String node_id)
301 {
302 DBInfo info = this.coll_db.getInfo(node_id);
303 if (info == null)
304 {
305 return 0;
306 }
307 String contains = info.getInfo("contains");
308 if (contains.equals(""))
309 {
310 return 0;
311 }
312 String[] children = contains.split(";");
313 return children.length;
314 }
315
316 /**
317 * returns the document type of the doc that the specified node belongs to.
318 * should be one of GSXML.DOC_TYPE_SIMPLE, GSXML.DOC_TYPE_PAGED,
319 * GSXML.DOC_TYPE_HIERARCHY
320 */
321 protected String getDocType(String node_id)
322 {
323 DBInfo info = this.coll_db.getInfo(node_id);
324 if (info == null)
325 {
326 return GSXML.DOC_TYPE_SIMPLE;
327 }
328 String doc_type = info.getInfo("doctype");
329 if (!doc_type.equals("") && !doc_type.equals("doc"))
330 {
331 return doc_type;
332 }
333
334 String top_id = OID.getTop(node_id);
335 boolean is_top = (top_id.equals(node_id) ? true : false);
336
337 String children = info.getInfo("contains");
338 boolean is_leaf = (children.equals("") ? true : false);
339
340 if (is_top && is_leaf)
341 { // a single section document
342 return GSXML.DOC_TYPE_SIMPLE;
343 }
344
345 // now we just check the top node
346 if (!is_top)
347 { // we need to look at the top info
348 info = this.coll_db.getInfo(top_id);
349 }
350 if (info == null)
351 {
352 return GSXML.DOC_TYPE_HIERARCHY;
353 }
354
355 String childtype = info.getInfo("childtype");
356 if (childtype.equals("Paged"))
357 {
358 return GSXML.DOC_TYPE_PAGED;
359 }
360 return GSXML.DOC_TYPE_HIERARCHY;
361 }
362
363 /**
364 * returns the content of a node should return a nodeContent element:
365 * <nodeContent>text content or other elements</nodeContent>
366 */
367 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
368
369 protected String getMetadata(String node_id, DBInfo info, String metadata, String lang)
370 {
371 String pos = "";
372 String relation = "";
373 String separator = ", ";
374 int index = metadata.indexOf(GSConstants.META_RELATION_SEP);
375 if (index == -1)
376 {
377 Vector<String> values = info.getMultiInfo(metadata);
378 if (values != null)
379 {
380 // just a plain meta entry eg dc.Title
381 StringBuffer result = new StringBuffer();
382 boolean first = true;
383 for (int i = 0; i < values.size(); i++)
384 {
385 if (first)
386 {
387 first = false;
388 }
389 else
390 {
391 result.append(separator);
392 }
393 result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
394 }
395 return result.toString();
396 }
397 else
398 {
399 String result = info.getInfo(metadata);
400 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
401 }
402 }
403
404 String temp = metadata.substring(0, index);
405 metadata = metadata.substring(index + 1);
406 // check for pos on the front, indicating which piece of meta the user wants
407 // pos can be "first", "last" or the position value of the requested piece of metadata
408 if (temp.startsWith(GSConstants.META_POS))
409 {
410 temp = temp.substring(GSConstants.META_POS.length());
411 pos = temp;
412
413 index = metadata.indexOf(GSConstants.META_RELATION_SEP);
414 if (index == -1)
415 {
416 temp = "";
417 }
418 else
419 {
420 temp = metadata.substring(0, index);
421 metadata = metadata.substring(index + 1);
422 }
423 }
424
425 // now check for relational info
426 if (temp.equals("parent") || temp.equals("root") || temp.equals("ancestors")
427 || temp.equals("siblings") || temp.equals("children") || temp.equals("descendants"))
428 { // "current" "siblings" "children" "descendants"
429 // gets all siblings by default
430 relation = temp;
431 index = metadata.indexOf(GSConstants.META_RELATION_SEP);
432 if (index == -1)
433 {
434 temp = "";
435 }
436 else
437 {
438 temp = metadata.substring(0, index);
439 metadata = metadata.substring(index + 1);
440 }
441 }
442
443 // now look for separator info
444 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP))
445 {
446 separator = temp.substring(1, temp.length() - 1);
447
448 }
449
450 String relation_id = node_id;
451 if (relation.equals("parent") || relation.equals("ancestors"))
452 {
453 relation_id = OID.getParent(node_id);
454 // parent or ancestor does not include self
455 if (relation_id.equals(node_id))
456 {
457 return "";
458 }
459 }
460 else if (relation.equals("root"))
461 {
462 relation_id = OID.getTop(node_id);
463 }
464
465 // now we either have a single node, or we have ancestors
466 DBInfo relation_info;
467 if (relation_id.equals(node_id))
468 {
469 relation_info = info;
470 }
471 else
472 {
473 relation_info = this.coll_db.getInfo(relation_id);
474 }
475 if (relation_info == null)
476 {
477 return "";
478 }
479
480 StringBuffer result = new StringBuffer();
481
482 Vector<String> values = relation_info.getMultiInfo(metadata);
483
484 if (!pos.equals("")) // if a particular position was specified, so not multiple values for the metadata
485 {
486 String meta = "";
487 if (values != null) {
488 if(pos.equals(GSConstants.META_FIRST)) {
489 meta = values.firstElement();
490 } else if(pos.equals(GSConstants.META_LAST)) {
491 meta = values.lastElement();
492 } else {
493 int position = Integer.parseInt(pos);
494 if(position < values.size()) {
495 meta = values.elementAt(position);
496 }
497 }
498 } // else ""
499
500 result.append(this.macro_resolver.resolve(meta, lang, MacroResolver.SCOPE_META, relation_id));
501 }
502 else
503 {
504 if (values != null)
505 {
506 boolean first = true;
507 for (int i = 0; i < values.size(); i++)
508 {
509 if (first)
510 {
511 first = false;
512 }
513 else
514 {
515 result.append(separator);
516 }
517 result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
518 }
519 }
520 logger.info(result);
521 }
522 // if not ancestors, then this is all we do
523 if (!relation.equals("ancestors"))
524 {
525 return result.toString();
526 }
527
528 // now do the ancestors
529 String current_id = relation_id;
530 relation_id = OID.getParent(current_id);
531 while (!relation_id.equals(current_id))
532 {
533 relation_info = this.coll_db.getInfo(relation_id);
534 if (relation_info == null)
535 return result.toString();
536
537 values = relation_info.getMultiInfo(metadata);
538 if (!pos.equals("")) // if a particular position was specified, so not multiple values for the metadata
539 {
540 String meta = "";
541 if (values != null) {
542 if(pos.equals(GSConstants.META_FIRST)) {
543 meta = values.firstElement();
544 } else if(pos.equals(GSConstants.META_LAST)) {
545 meta = values.lastElement();
546 } else {
547 int position = Integer.parseInt(pos);
548 if(position < values.size()) {
549 meta = values.elementAt(position);
550 }
551 }
552 } // else ""
553
554 result.insert(0, separator);
555 result.insert(0, this.macro_resolver.resolve(meta, lang, MacroResolver.SCOPE_META, relation_id));
556 }
557 else
558 {
559 if (values != null)
560 {
561 for (int i = values.size() - 1; i >= 0; i--)
562 {
563 result.insert(0, separator);
564 result.insert(0, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
565 }
566 }
567
568 }
569 current_id = relation_id;
570 relation_id = OID.getParent(current_id);
571 }
572 return result.toString();
573 }
574
575 /**
576 * needs to get info from collection database - if the calling code gets it
577 * already it may pay to pass it in instead
578 */
579 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
580 {
581 // resolve any collection specific macros
582 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
583 return doc_content;
584 }
585
586 protected Element getInfo(String doc_id, String info_type)
587 {
588
589 String value = "";
590 if (info_type.equals(INFO_NUM_SIBS))
591 {
592 String parent_id = OID.getParent(doc_id);
593 if (parent_id.equals(doc_id))
594 {
595 value = "0";
596 }
597 else
598 {
599 value = String.valueOf(getNumChildren(parent_id));
600 }
601 }
602 else if (info_type.equals(INFO_NUM_CHILDREN))
603 {
604 value = String.valueOf(getNumChildren(doc_id));
605 }
606 else if (info_type.equals(INFO_SIB_POS))
607 {
608 String parent_id = OID.getParent(doc_id);
609 if (parent_id.equals(doc_id))
610 {
611 value = "-1";
612 }
613 else
614 {
615 DBInfo info = this.coll_db.getInfo(parent_id);
616 if (info == null)
617 {
618 value = "-1";
619 }
620 else
621 {
622 String contains = info.getInfo("contains");
623 contains = StringUtils.replace(contains, "\"", parent_id);
624 String[] children = contains.split(";");
625 for (int i = 0; i < children.length; i++)
626 {
627 String child_id = children[i];
628 if (child_id.equals(doc_id))
629 {
630 value = String.valueOf(i + 1); // make it from 1 to length
631 break;
632 }
633 }
634 }
635 }
636 }
637 else
638 {
639 return null;
640 }
641 Element info_elem = this.doc.createElement("info");
642 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
643 info_elem.setAttribute(GSXML.VALUE_ATT, value);
644 return info_elem;
645 }
646
647 protected String getHrefOID(String href_url)
648 {
649 return this.coll_db.docnum2OID(href_url);
650 }
651
652}
Note: See TracBrowser for help on using the repository browser.