source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 25427

Last change on this file since 25427 was 25427, checked in by sjm84, 12 years ago

Reformatting this file ahead of some changes

  • Property svn:keywords set to Author Date Id Revision
File size: 15.6 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46// Apache Commons
47import org.apache.commons.lang3.*;
48
49/**
50 * Implements the generic retrieval and classifier services for GS2 collections.
51 *
52 * @author Katherine Don
53 * @author Michael Dewsnip
54 */
55
56public abstract class AbstractGS2DocumentRetrieve extends AbstractDocumentRetrieve
57{
58
59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
60
61 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
62 protected String index_stem = null;
63
64 protected SimpleCollectionDatabase coll_db = null;
65
66 /** constructor */
67 protected AbstractGS2DocumentRetrieve()
68 {
69 this.macro_resolver = new GS2MacroResolver();
70 }
71
72 public void cleanUp()
73 {
74 super.cleanUp();
75 this.coll_db.closeDatabase();
76 }
77
78 /** configure this service */
79 public boolean configure(Element info, Element extra_info)
80 {
81 if (!super.configure(info, extra_info))
82 {
83 return false;
84 }
85
86 logger.info("Configuring AbstractGS2DocumentRetrieve...");
87 //this.config_info = info;
88
89 // the index stem is either specified in the config file or is the collection name
90 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
91 if (index_stem_elem != null)
92 {
93 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
94 }
95 if (this.index_stem == null || this.index_stem.equals(""))
96 {
97 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
98 this.index_stem = this.cluster_name;
99 }
100
101 // find out what kind of database we have
102 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
103 String database_type = null;
104 if (database_type_elem != null)
105 {
106 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
107 }
108 if (database_type == null || database_type.equals(""))
109 {
110 database_type = "gdbm"; // the default
111 }
112 coll_db = new SimpleCollectionDatabase(database_type);
113 if (!coll_db.databaseOK())
114 {
115 logger.error("Couldn't create the collection database of type " + database_type);
116 return false;
117 }
118
119 // Open database for querying
120 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
121 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ))
122 {
123 logger.error("Could not open collection database!");
124 return false;
125 }
126
127 // we need to set the database for our GS2 macro resolver
128 GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver) this.macro_resolver;
129 gs2_macro_resolver.setDB(this.coll_db);
130
131 return true;
132 }
133
134 /** if id ends in .fc, .pc etc, then translate it to the correct id */
135 protected String translateId(String node_id)
136 {
137 return OID.translateOID(this.coll_db, node_id); //return this.coll_db.translateOID(node_id);
138 }
139
140 /**
141 * if an id is not a greenstone id (an external id) then translate it to a
142 * greenstone one
143 */
144 protected String translateExternalId(String node_id)
145 {
146 return this.coll_db.externalId2OID(node_id);
147 }
148
149 /**
150 * returns the id of the root node of the document containing node node_id.
151 * . may be the same as node_id
152 */
153 protected String getRootId(String node_id)
154 {
155 return OID.getTop(node_id);
156 }
157
158 /** returns a list of the child ids in order, null if no children */
159 protected ArrayList getChildrenIds(String node_id)
160 {
161 DBInfo info = this.coll_db.getInfo(node_id);
162 if (info == null)
163 {
164 return null;
165 }
166
167 String contains = info.getInfo("contains");
168 if (contains.equals(""))
169 {
170 return null;
171 }
172 ArrayList children = new ArrayList();
173 StringTokenizer st = new StringTokenizer(contains, ";");
174 while (st.hasMoreTokens())
175 {
176 String child_id = StringUtils.replace(st.nextToken(), "\"", node_id);
177 children.add(child_id);
178 }
179 return children;
180
181 }
182
183 /** returns the node id of the parent node, null if no parent */
184 protected String getParentId(String node_id)
185 {
186 String parent = OID.getParent(node_id);
187 if (parent.equals(node_id))
188 {
189 return null;
190 }
191 return parent;
192 }
193
194 /**
195 * get the metadata for the classifier node node_id returns a metadataList
196 * element: <metadataList><metadata
197 * name="xxx">value</metadata></metadataList>
198 */
199 // assumes only one value per metadata
200 protected Element getMetadataList(String node_id, boolean all_metadata, ArrayList metadata_names) throws GSException
201 {
202 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
203 DBInfo info = this.coll_db.getInfo(node_id);
204 if (info == null)
205 {
206 return null;
207 }
208 String lang = "en"; // why do we need this??
209 if (all_metadata)
210 {
211 // return everything out of the database
212 Set keys = info.getKeys();
213 Iterator it = keys.iterator();
214 while (it.hasNext())
215 {
216 String key = (String) it.next();
217 //String value = info.getInfo(key);
218 Vector values = info.getMultiInfo(key);
219 for (int i = 0; i < values.size(); i++)
220 {
221 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve((String) values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
222 }
223 }
224
225 }
226 else
227 {
228 for (int i = 0; i < metadata_names.size(); i++)
229 {
230 String meta_name = (String) metadata_names.get(i);
231 String value = getMetadata(node_id, info, meta_name, lang);
232 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
233 }
234 }
235 return metadata_list;
236 }
237
238 /**
239 * returns the structural information asked for. info_type may be one of
240 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
241 */
242 protected String getStructureInfo(String doc_id, String info_type)
243 {
244 String value = "";
245 if (info_type.equals(INFO_NUM_SIBS))
246 {
247 String parent_id = OID.getParent(doc_id);
248 if (parent_id.equals(doc_id))
249 {
250 value = "0";
251 }
252 else
253 {
254 value = String.valueOf(getNumChildren(parent_id));
255 }
256 return value;
257 }
258
259 if (info_type.equals(INFO_NUM_CHILDREN))
260 {
261 return String.valueOf(getNumChildren(doc_id));
262 }
263
264 if (info_type.equals(INFO_SIB_POS))
265 {
266 String parent_id = OID.getParent(doc_id);
267 if (parent_id.equals(doc_id))
268 {
269 return "-1";
270 }
271
272 DBInfo info = this.coll_db.getInfo(parent_id);
273 if (info == null)
274 {
275 return "-1";
276 }
277
278 String contains = info.getInfo("contains");
279 contains = StringUtils.replace(contains, "\"", parent_id);
280 String[] children = contains.split(";");
281 for (int i = 0; i < children.length; i++)
282 {
283 String child_id = children[i];
284 if (child_id.equals(doc_id))
285 {
286 return String.valueOf(i + 1); // make it from 1 to length
287
288 }
289 }
290
291 return "-1";
292 }
293 else
294 {
295 return null;
296 }
297
298 }
299
300 protected int getNumChildren(String node_id)
301 {
302 DBInfo info = this.coll_db.getInfo(node_id);
303 if (info == null)
304 {
305 return 0;
306 }
307 String contains = info.getInfo("contains");
308 if (contains.equals(""))
309 {
310 return 0;
311 }
312 String[] children = contains.split(";");
313 return children.length;
314 }
315
316 /**
317 * returns the document type of the doc that the specified node belongs to.
318 * should be one of GSXML.DOC_TYPE_SIMPLE, GSXML.DOC_TYPE_PAGED,
319 * GSXML.DOC_TYPE_HIERARCHY
320 */
321 protected String getDocType(String node_id)
322 {
323 DBInfo info = this.coll_db.getInfo(node_id);
324 if (info == null)
325 {
326 return GSXML.DOC_TYPE_SIMPLE;
327 }
328 String doc_type = info.getInfo("doctype");
329 if (!doc_type.equals("") && !doc_type.equals("doc"))
330 {
331 return doc_type;
332 }
333
334 String top_id = OID.getTop(node_id);
335 boolean is_top = (top_id.equals(node_id) ? true : false);
336
337 String children = info.getInfo("contains");
338 boolean is_leaf = (children.equals("") ? true : false);
339
340 if (is_top && is_leaf)
341 { // a single section document
342 return GSXML.DOC_TYPE_SIMPLE;
343 }
344
345 // now we just check the top node
346 if (!is_top)
347 { // we need to look at the top info
348 info = this.coll_db.getInfo(top_id);
349 }
350 if (info == null)
351 {
352 return GSXML.DOC_TYPE_HIERARCHY;
353 }
354
355 String childtype = info.getInfo("childtype");
356 if (childtype.equals("Paged"))
357 {
358 return GSXML.DOC_TYPE_PAGED;
359 }
360 return GSXML.DOC_TYPE_HIERARCHY;
361 }
362
363 /**
364 * returns the content of a node should return a nodeContent element:
365 * <nodeContent>text content or other elements</nodeContent>
366 */
367 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
368
369 protected String getMetadata(String node_id, DBInfo info, String metadata, String lang)
370 {
371 boolean multiple = false;
372 String relation = "";
373 String separator = ", ";
374 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
375 if (pos == -1)
376 {
377 Vector values = info.getMultiInfo(metadata);
378 if (values != null)
379 {
380 // just a plain meta entry eg dc.Title
381 StringBuffer result = new StringBuffer();
382 boolean first = true;
383 for (int i = 0; i < values.size(); i++)
384 {
385 if (first)
386 {
387 first = false;
388 }
389 else
390 {
391 result.append(separator);
392 }
393 result.append(this.macro_resolver.resolve((String) values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
394 }
395 return result.toString();
396 }
397 else
398 {
399 String result = info.getInfo(metadata);
400 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
401 }
402 }
403
404 String temp = metadata.substring(0, pos);
405 metadata = metadata.substring(pos + 1);
406 // check for all on the front
407 if (temp.equals("all"))
408 {
409 multiple = true;
410 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
411 if (pos == -1)
412 {
413 temp = "";
414 }
415 else
416 {
417 temp = metadata.substring(0, pos);
418 metadata = metadata.substring(pos + 1);
419 }
420 }
421
422 // now check for relational info
423 if (temp.equals("parent") || temp.equals("root") || temp.equals("ancestors"))
424 { // "current" "siblings" "children" "descendants"
425 relation = temp;
426 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
427 if (pos == -1)
428 {
429 temp = "";
430 }
431 else
432 {
433 temp = metadata.substring(0, pos);
434 metadata = metadata.substring(pos + 1);
435 }
436 }
437
438 // now look for separator info
439 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP))
440 {
441 separator = temp.substring(1, temp.length() - 1);
442
443 }
444
445 String relation_id = node_id;
446 if (relation.equals("parent") || relation.equals("ancestors"))
447 {
448 relation_id = OID.getParent(node_id);
449 // parent or ancestor does not include self
450 if (relation_id.equals(node_id))
451 {
452 return "";
453 }
454 }
455 else if (relation.equals("root"))
456 {
457 relation_id = OID.getTop(node_id);
458 }
459
460 // now we either have a single node, or we have ancestors
461 DBInfo relation_info;
462 if (relation_id.equals(node_id))
463 {
464 relation_info = info;
465 }
466 else
467 {
468 relation_info = this.coll_db.getInfo(relation_id);
469 }
470 if (relation_info == null)
471 {
472 return "";
473 }
474
475 StringBuffer result = new StringBuffer();
476
477 if (!multiple)
478 {
479 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
480 }
481 else
482 {
483 // we have multiple meta
484 Vector values = relation_info.getMultiInfo(metadata);
485 if (values != null)
486 {
487 boolean first = true;
488 for (int i = 0; i < values.size(); i++)
489 {
490 if (first)
491 {
492 first = false;
493 }
494 else
495 {
496 result.append(separator);
497 }
498 result.append(this.macro_resolver.resolve((String) values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
499 }
500 }
501 logger.info(result);
502 }
503 // if not ancestors, then this is all we do
504 if (!relation.equals("ancestors"))
505 {
506 return result.toString();
507 }
508
509 // now do the ancestors
510 String current_id = relation_id;
511 relation_id = OID.getParent(current_id);
512 while (!relation_id.equals(current_id))
513 {
514 relation_info = this.coll_db.getInfo(relation_id);
515 if (relation_info == null)
516 return result.toString();
517 if (!multiple)
518 {
519 result.insert(0, separator);
520 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
521 }
522 else
523 {
524 Vector values = relation_info.getMultiInfo(metadata);
525 if (values != null)
526 {
527 for (int i = values.size() - 1; i >= 0; i--)
528 {
529 result.insert(0, separator);
530 result.insert(0, this.macro_resolver.resolve((String) values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
531 }
532 }
533
534 }
535 current_id = relation_id;
536 relation_id = OID.getParent(current_id);
537 }
538 return result.toString();
539 }
540
541 /**
542 * needs to get info from collection database - if the calling code gets it
543 * already it may pay to pass it in instead
544 */
545 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
546 {
547 // resolve any collection specific macros
548 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
549 return doc_content;
550 }
551
552 protected Element getInfo(String doc_id, String info_type)
553 {
554
555 String value = "";
556 if (info_type.equals(INFO_NUM_SIBS))
557 {
558 String parent_id = OID.getParent(doc_id);
559 if (parent_id.equals(doc_id))
560 {
561 value = "0";
562 }
563 else
564 {
565 value = String.valueOf(getNumChildren(parent_id));
566 }
567 }
568 else if (info_type.equals(INFO_NUM_CHILDREN))
569 {
570 value = String.valueOf(getNumChildren(doc_id));
571 }
572 else if (info_type.equals(INFO_SIB_POS))
573 {
574 String parent_id = OID.getParent(doc_id);
575 if (parent_id.equals(doc_id))
576 {
577 value = "-1";
578 }
579 else
580 {
581 DBInfo info = this.coll_db.getInfo(parent_id);
582 if (info == null)
583 {
584 value = "-1";
585 }
586 else
587 {
588 String contains = info.getInfo("contains");
589 contains = StringUtils.replace(contains, "\"", parent_id);
590 String[] children = contains.split(";");
591 for (int i = 0; i < children.length; i++)
592 {
593 String child_id = children[i];
594 if (child_id.equals(doc_id))
595 {
596 value = String.valueOf(i + 1); // make it from 1 to length
597 break;
598 }
599 }
600 }
601 }
602 }
603 else
604 {
605 return null;
606 }
607 Element info_elem = this.doc.createElement("info");
608 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
609 info_elem.setAttribute(GSXML.VALUE_ATT, value);
610 return info_elem;
611 }
612
613 protected String getHrefOID(String href_url)
614 {
615 return this.coll_db.docnum2OID(href_url);
616 }
617
618}
Note: See TracBrowser for help on using the repository browser.