source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 25635

Last change on this file since 25635 was 25635, checked in by sjm84, 12 years ago

Fixing Greenstone 3's use (or lack thereof) of generics, this was done automatically so we may want to change it over time. This change will also auto-format any files that have not already been formatted.

  • Property svn:keywords set to Author Date Id Revision
File size: 15.6 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46// Apache Commons
47import org.apache.commons.lang3.*;
48
49/**
50 * Implements the generic retrieval and classifier services for GS2 collections.
51 *
52 * @author Katherine Don
53 * @author Michael Dewsnip
54 */
55
56public abstract class AbstractGS2DocumentRetrieve extends AbstractDocumentRetrieve
57{
58
59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
60
61 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
62 protected String index_stem = null;
63
64 protected SimpleCollectionDatabase coll_db = null;
65
66 /** constructor */
67 protected AbstractGS2DocumentRetrieve()
68 {
69 this.macro_resolver = new GS2MacroResolver();
70 }
71
72 public void cleanUp()
73 {
74 super.cleanUp();
75 this.coll_db.closeDatabase();
76 }
77
78 /** configure this service */
79 public boolean configure(Element info, Element extra_info)
80 {
81 if (!super.configure(info, extra_info))
82 {
83 return false;
84 }
85
86 logger.info("Configuring AbstractGS2DocumentRetrieve...");
87 //this.config_info = info;
88
89 // the index stem is either specified in the config file or is the collection name
90 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
91 if (index_stem_elem != null)
92 {
93 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
94 }
95 if (this.index_stem == null || this.index_stem.equals(""))
96 {
97 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
98 this.index_stem = this.cluster_name;
99 }
100
101 // find out what kind of database we have
102 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
103 String database_type = null;
104 if (database_type_elem != null)
105 {
106 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
107 }
108 if (database_type == null || database_type.equals(""))
109 {
110 database_type = "gdbm"; // the default
111 }
112 coll_db = new SimpleCollectionDatabase(database_type);
113 if (!coll_db.databaseOK())
114 {
115 logger.error("Couldn't create the collection database of type " + database_type);
116 return false;
117 }
118
119 // Open database for querying
120 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
121 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ))
122 {
123 logger.error("Could not open collection database!");
124 return false;
125 }
126
127 // we need to set the database for our GS2 macro resolver
128 GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver) this.macro_resolver;
129 gs2_macro_resolver.setDB(this.coll_db);
130
131 return true;
132 }
133
134 /** if id ends in .fc, .pc etc, then translate it to the correct id */
135 protected String translateId(String node_id)
136 {
137 return OID.translateOID(this.coll_db, node_id); //return this.coll_db.translateOID(node_id);
138 }
139
140 /**
141 * if an id is not a greenstone id (an external id) then translate it to a
142 * greenstone one
143 */
144 protected String translateExternalId(String node_id)
145 {
146 return this.coll_db.externalId2OID(node_id);
147 }
148
149 /**
150 * returns the id of the root node of the document containing node node_id.
151 * . may be the same as node_id
152 */
153 protected String getRootId(String node_id)
154 {
155 return OID.getTop(node_id);
156 }
157
158 /** returns a list of the child ids in order, null if no children */
159 protected ArrayList<String> getChildrenIds(String node_id)
160 {
161 DBInfo info = this.coll_db.getInfo(node_id);
162 if (info == null)
163 {
164 return null;
165 }
166
167 String contains = info.getInfo("contains");
168 if (contains.equals(""))
169 {
170 return null;
171 }
172 ArrayList<String> children = new ArrayList<String>();
173 StringTokenizer st = new StringTokenizer(contains, ";");
174 while (st.hasMoreTokens())
175 {
176 String child_id = StringUtils.replace(st.nextToken(), "\"", node_id);
177 children.add(child_id);
178 }
179 return children;
180
181 }
182
183 /** returns the node id of the parent node, null if no parent */
184 protected String getParentId(String node_id)
185 {
186 String parent = OID.getParent(node_id);
187 if (parent.equals(node_id))
188 {
189 return null;
190 }
191 return parent;
192 }
193
194 /**
195 * get the metadata for the classifier node node_id returns a metadataList
196 * element: <metadataList><metadata
197 * name="xxx">value</metadata></metadataList>
198 */
199 // assumes only one value per metadata
200 protected Element getMetadataList(String node_id, boolean all_metadata, ArrayList<String> metadata_names) throws GSException
201 {
202 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
203 DBInfo info = this.coll_db.getInfo(node_id);
204 if (info == null)
205 {
206 return null;
207 }
208 String lang = "en"; // why do we need this??
209 if (all_metadata)
210 {
211 // return everything out of the database
212 Set<String> keys = info.getKeys();
213 Iterator<String> it = keys.iterator();
214 while (it.hasNext())
215 {
216 String key = it.next();
217 //String value = info.getInfo(key);
218 Vector<String> values = info.getMultiInfo(key);
219 for (int i = 0; i < values.size(); i++)
220 {
221 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
222 }
223 }
224
225 }
226 else
227 {
228 for (int i = 0; i < metadata_names.size(); i++)
229 {
230 String meta_name = metadata_names.get(i);
231 String value = getMetadata(node_id, info, meta_name, lang);
232 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
233 }
234 }
235 return metadata_list;
236 }
237
238 /**
239 * returns the structural information asked for. info_type may be one of
240 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
241 */
242 protected String getStructureInfo(String doc_id, String info_type)
243 {
244 String value = "";
245 if (info_type.equals(INFO_NUM_SIBS))
246 {
247 String parent_id = OID.getParent(doc_id);
248 if (parent_id.equals(doc_id))
249 {
250 value = "0";
251 }
252 else
253 {
254 value = String.valueOf(getNumChildren(parent_id));
255 }
256 return value;
257 }
258
259 if (info_type.equals(INFO_NUM_CHILDREN))
260 {
261 return String.valueOf(getNumChildren(doc_id));
262 }
263
264 if (info_type.equals(INFO_SIB_POS))
265 {
266 String parent_id = OID.getParent(doc_id);
267 if (parent_id.equals(doc_id))
268 {
269 return "-1";
270 }
271
272 DBInfo info = this.coll_db.getInfo(parent_id);
273 if (info == null)
274 {
275 return "-1";
276 }
277
278 String contains = info.getInfo("contains");
279 contains = StringUtils.replace(contains, "\"", parent_id);
280 String[] children = contains.split(";");
281 for (int i = 0; i < children.length; i++)
282 {
283 String child_id = children[i];
284 if (child_id.equals(doc_id))
285 {
286 return String.valueOf(i + 1); // make it from 1 to length
287
288 }
289 }
290
291 return "-1";
292 }
293 else
294 {
295 return null;
296 }
297
298 }
299
300 protected int getNumChildren(String node_id)
301 {
302 DBInfo info = this.coll_db.getInfo(node_id);
303 if (info == null)
304 {
305 return 0;
306 }
307 String contains = info.getInfo("contains");
308 if (contains.equals(""))
309 {
310 return 0;
311 }
312 String[] children = contains.split(";");
313 return children.length;
314 }
315
316 /**
317 * returns the document type of the doc that the specified node belongs to.
318 * should be one of GSXML.DOC_TYPE_SIMPLE, GSXML.DOC_TYPE_PAGED,
319 * GSXML.DOC_TYPE_HIERARCHY
320 */
321 protected String getDocType(String node_id)
322 {
323 DBInfo info = this.coll_db.getInfo(node_id);
324 if (info == null)
325 {
326 return GSXML.DOC_TYPE_SIMPLE;
327 }
328 String doc_type = info.getInfo("doctype");
329 if (!doc_type.equals("") && !doc_type.equals("doc"))
330 {
331 return doc_type;
332 }
333
334 String top_id = OID.getTop(node_id);
335 boolean is_top = (top_id.equals(node_id) ? true : false);
336
337 String children = info.getInfo("contains");
338 boolean is_leaf = (children.equals("") ? true : false);
339
340 if (is_top && is_leaf)
341 { // a single section document
342 return GSXML.DOC_TYPE_SIMPLE;
343 }
344
345 // now we just check the top node
346 if (!is_top)
347 { // we need to look at the top info
348 info = this.coll_db.getInfo(top_id);
349 }
350 if (info == null)
351 {
352 return GSXML.DOC_TYPE_HIERARCHY;
353 }
354
355 String childtype = info.getInfo("childtype");
356 if (childtype.equals("Paged"))
357 {
358 return GSXML.DOC_TYPE_PAGED;
359 }
360 return GSXML.DOC_TYPE_HIERARCHY;
361 }
362
363 /**
364 * returns the content of a node should return a nodeContent element:
365 * <nodeContent>text content or other elements</nodeContent>
366 */
367 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
368
369 protected String getMetadata(String node_id, DBInfo info, String metadata, String lang)
370 {
371 boolean multiple = false;
372 String relation = "";
373 String separator = ", ";
374 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
375 if (pos == -1)
376 {
377 Vector<String> values = info.getMultiInfo(metadata);
378 if (values != null)
379 {
380 // just a plain meta entry eg dc.Title
381 StringBuffer result = new StringBuffer();
382 boolean first = true;
383 for (int i = 0; i < values.size(); i++)
384 {
385 if (first)
386 {
387 first = false;
388 }
389 else
390 {
391 result.append(separator);
392 }
393 result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
394 }
395 return result.toString();
396 }
397 else
398 {
399 String result = info.getInfo(metadata);
400 return this.macro_resolver.resolve(result, lang, MacroResolver.SCOPE_META, node_id);
401 }
402 }
403
404 String temp = metadata.substring(0, pos);
405 metadata = metadata.substring(pos + 1);
406 // check for all on the front
407 if (temp.equals("all"))
408 {
409 multiple = true;
410 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
411 if (pos == -1)
412 {
413 temp = "";
414 }
415 else
416 {
417 temp = metadata.substring(0, pos);
418 metadata = metadata.substring(pos + 1);
419 }
420 }
421
422 // now check for relational info
423 if (temp.equals("parent") || temp.equals("root") || temp.equals("ancestors"))
424 { // "current" "siblings" "children" "descendants"
425 relation = temp;
426 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
427 if (pos == -1)
428 {
429 temp = "";
430 }
431 else
432 {
433 temp = metadata.substring(0, pos);
434 metadata = metadata.substring(pos + 1);
435 }
436 }
437
438 // now look for separator info
439 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP))
440 {
441 separator = temp.substring(1, temp.length() - 1);
442
443 }
444
445 String relation_id = node_id;
446 if (relation.equals("parent") || relation.equals("ancestors"))
447 {
448 relation_id = OID.getParent(node_id);
449 // parent or ancestor does not include self
450 if (relation_id.equals(node_id))
451 {
452 return "";
453 }
454 }
455 else if (relation.equals("root"))
456 {
457 relation_id = OID.getTop(node_id);
458 }
459
460 // now we either have a single node, or we have ancestors
461 DBInfo relation_info;
462 if (relation_id.equals(node_id))
463 {
464 relation_info = info;
465 }
466 else
467 {
468 relation_info = this.coll_db.getInfo(relation_id);
469 }
470 if (relation_info == null)
471 {
472 return "";
473 }
474
475 StringBuffer result = new StringBuffer();
476
477 if (!multiple)
478 {
479 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
480 }
481 else
482 {
483 // we have multiple meta
484 Vector<String> values = relation_info.getMultiInfo(metadata);
485 if (values != null)
486 {
487 boolean first = true;
488 for (int i = 0; i < values.size(); i++)
489 {
490 if (first)
491 {
492 first = false;
493 }
494 else
495 {
496 result.append(separator);
497 }
498 result.append(this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
499 }
500 }
501 logger.info(result);
502 }
503 // if not ancestors, then this is all we do
504 if (!relation.equals("ancestors"))
505 {
506 return result.toString();
507 }
508
509 // now do the ancestors
510 String current_id = relation_id;
511 relation_id = OID.getParent(current_id);
512 while (!relation_id.equals(current_id))
513 {
514 relation_info = this.coll_db.getInfo(relation_id);
515 if (relation_info == null)
516 return result.toString();
517 if (!multiple)
518 {
519 result.insert(0, separator);
520 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
521 }
522 else
523 {
524 Vector<String> values = relation_info.getMultiInfo(metadata);
525 if (values != null)
526 {
527 for (int i = values.size() - 1; i >= 0; i--)
528 {
529 result.insert(0, separator);
530 result.insert(0, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
531 }
532 }
533
534 }
535 current_id = relation_id;
536 relation_id = OID.getParent(current_id);
537 }
538 return result.toString();
539 }
540
541 /**
542 * needs to get info from collection database - if the calling code gets it
543 * already it may pay to pass it in instead
544 */
545 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
546 {
547 // resolve any collection specific macros
548 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
549 return doc_content;
550 }
551
552 protected Element getInfo(String doc_id, String info_type)
553 {
554
555 String value = "";
556 if (info_type.equals(INFO_NUM_SIBS))
557 {
558 String parent_id = OID.getParent(doc_id);
559 if (parent_id.equals(doc_id))
560 {
561 value = "0";
562 }
563 else
564 {
565 value = String.valueOf(getNumChildren(parent_id));
566 }
567 }
568 else if (info_type.equals(INFO_NUM_CHILDREN))
569 {
570 value = String.valueOf(getNumChildren(doc_id));
571 }
572 else if (info_type.equals(INFO_SIB_POS))
573 {
574 String parent_id = OID.getParent(doc_id);
575 if (parent_id.equals(doc_id))
576 {
577 value = "-1";
578 }
579 else
580 {
581 DBInfo info = this.coll_db.getInfo(parent_id);
582 if (info == null)
583 {
584 value = "-1";
585 }
586 else
587 {
588 String contains = info.getInfo("contains");
589 contains = StringUtils.replace(contains, "\"", parent_id);
590 String[] children = contains.split(";");
591 for (int i = 0; i < children.length; i++)
592 {
593 String child_id = children[i];
594 if (child_id.equals(doc_id))
595 {
596 value = String.valueOf(i + 1); // make it from 1 to length
597 break;
598 }
599 }
600 }
601 }
602 }
603 else
604 {
605 return null;
606 }
607 Element info_elem = this.doc.createElement("info");
608 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
609 info_elem.setAttribute(GSXML.VALUE_ATT, value);
610 return info_elem;
611 }
612
613 protected String getHrefOID(String href_url)
614 {
615 return this.coll_db.docnum2OID(href_url);
616 }
617
618}
Note: See TracBrowser for help on using the repository browser.