source: branches/ant-install-branch/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 9815

Last change on this file since 9815 was 9815, checked in by kjdon, 19 years ago

some methods from documentretrieve classes now throw GSExceptions. am trying to make it so that no Exceptions get to the user interface. returning a lot more error elements too, in the hope that they may be useful for other people

  • Property svn:keywords set to Author Date Id Revision
File size: 14.4 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.GDBMWrapper;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44/** Implements the generic retrieval and classifier services for GS2
45 * collections.
46 *
47 * @author <a href="mailto:[email protected]">Katherine Don</a>
48 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
49 */
50
51public abstract class AbstractGS2DocumentRetrieve
52 extends AbstractDocumentRetrieve {
53
54 protected static final String INDEX_STEM_ELEM = "indexStem";
55
56 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
57 protected String index_stem = null;
58
59 protected GDBMWrapper gdbm_src = null;
60
61
62 /** constructor */
63 protected AbstractGS2DocumentRetrieve()
64 {
65 this.gdbm_src = new GDBMWrapper();
66 this.macro_resolver = new GS2MacroResolver(gdbm_src);
67 }
68
69
70 /** configure this service */
71 public boolean configure(Element info, Element extra_info)
72 {
73
74 System.out.println("Configuring AbstractGS2DocumentRetrieve...");
75 //this.config_info = info;
76
77 // Open GDBM database for querying
78 String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
79 if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
80 System.err.println("AbstractGS2DocumentRetrieve Error: Could not open GDBM database!");
81 return false;
82 }
83
84 // the index stem is either specified in the config file or is the collection name
85 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
86 if (index_stem_elem != null) {
87 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
88 }
89 if (this.index_stem == null || this.index_stem.equals("")) {
90 System.err.println("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
91 this.index_stem = this.cluster_name;
92 }
93
94
95 return super.configure(info, extra_info);
96
97 }
98
99 /** if id ends in .fc, .pc etc, then translate it to the correct id */
100 protected String translateId(String node_id) {
101 return this.gdbm_src.translateOID(node_id);
102 }
103
104 /** if an id is not a greenstone id (an external id) then translate
105 it to a greenstone one*/
106 protected String translateExternalId(String node_id){
107 return this.gdbm_src.externalId2OID(node_id);
108 }
109
110 /** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
111 protected String getRootId(String node_id) {
112 return OID.getTop(node_id);
113 }
114 /** returns a list of the child ids in order, null if no children */
115 protected ArrayList getChildrenIds(String node_id) {
116 DBInfo info = this.gdbm_src.getInfo(node_id);
117 if (info == null) {
118 return null;
119 }
120
121 String contains = info.getInfo("contains");
122 if (contains.equals("")) {
123 return null;
124 }
125 ArrayList children = new ArrayList();
126 StringTokenizer st = new StringTokenizer(contains, ";");
127 while (st.hasMoreTokens()) {
128 String child_id = st.nextToken().replaceAll("\"", node_id);
129 children.add(child_id);
130 }
131 return children;
132
133 }
134 /** returns the node id of the parent node, null if no parent */
135 protected String getParentId(String node_id){
136 String parent = OID.getParent(node_id);
137 if (parent.equals(node_id)) {
138 return null;
139 }
140 return parent;
141 }
142
143 /** get the metadata for the classifier node node_id
144 * returns a metadataList element:
145 * <metadataList><metadata name="xxx">value</metadata></metadataList>
146 */
147 // assumes only one value per metadata
148 protected Element getMetadataList(String node_id, boolean all_metadata,
149 ArrayList metadata_names)
150 throws GSException {
151 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
152 DBInfo info = this.gdbm_src.getInfo(node_id);
153 if (info == null) {
154 return null;
155 }
156 String lang = "en"; // why do we need this??
157 if (all_metadata) {
158 // return everything out of the database
159 Set keys = info.getKeys();
160 Iterator it = keys.iterator();
161 while(it.hasNext()) {
162 String key = (String)it.next();
163 String value = info.getInfo(key);
164 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(value, lang, MacroResolver.SCOPE_META, node_id));
165 }
166
167 } else {
168 for (int i=0; i<metadata_names.size(); i++) {
169 String meta_name = (String) metadata_names.get(i);
170 String value = getMetadata(node_id, info, meta_name, lang);
171 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
172 }
173 }
174 return metadata_list;
175 }
176
177 /** returns the structural information asked for.
178 * info_type may be one of
179 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
180 */
181 protected String getStructureInfo(String doc_id, String info_type) {
182 String value="";
183 if (info_type.equals(INFO_NUM_SIBS)) {
184 String parent_id = OID.getParent(doc_id);
185 if (parent_id.equals(doc_id)) {
186 value="0";
187 } else {
188 value = String.valueOf(getNumChildren(parent_id));
189 }
190 return value;
191 }
192
193 if (info_type.equals(INFO_NUM_CHILDREN)) {
194 return String.valueOf(getNumChildren(doc_id));
195 }
196
197
198 if (info_type.equals(INFO_SIB_POS)) {
199 String parent_id = OID.getParent(doc_id);
200 if (parent_id.equals(doc_id)) {
201 return "-1";
202 }
203
204 DBInfo info = this.gdbm_src.getInfo(parent_id);
205 if (info==null) {
206 return "-1";
207 }
208
209 String contains = info.getInfo("contains");
210 contains = contains.replaceAll("\"", parent_id);
211 String [] children = contains.split(";");
212 for (int i=0;i<children.length;i++) {
213 String child_id = children[i];
214 if (child_id.equals(doc_id)) {
215 return String.valueOf(i+1); // make it from 1 to length
216
217 }
218 }
219
220 return "-1";
221 } else {
222 return null;
223 }
224
225 }
226
227 protected int getNumChildren(String node_id) {
228 DBInfo info = this.gdbm_src.getInfo(node_id);
229 if (info == null) {
230 return 0;
231 }
232 String contains = info.getInfo("contains");
233 if (contains.equals("")) {
234 return 0;
235 }
236 String [] children = contains.split(";");
237 return children.length;
238 }
239
240 /** returns the document type of the doc that the specified node
241 belongs to. should be one of
242 GSXML.DOC_TYPE_SIMPLE,
243 GSXML.DOC_TYPE_PAGED,
244 GSXML.DOC_TYPE_HIERARCHY
245 */
246 protected String getDocType(String node_id) {
247 DBInfo info = this.gdbm_src.getInfo(node_id);
248 if (info == null) {
249 return GSXML.DOC_TYPE_SIMPLE;
250 }
251 String doc_type = info.getInfo("doctype");
252 if (!doc_type.equals("")&&!doc_type.equals("doc")) {
253 return doc_type;
254 }
255
256 String top_id = OID.getTop(node_id);
257 boolean is_top = (top_id.equals(node_id) ? true : false);
258
259 String children = info.getInfo("contains");
260 boolean is_leaf = (children.equals("") ? true : false);
261
262 if (is_top && is_leaf) { // a single section document
263 return GSXML.DOC_TYPE_SIMPLE;
264 }
265
266 // now we just check the top node
267 if (!is_top) { // we need to look at the top info
268 info = this.gdbm_src.getInfo(top_id);
269 }
270 if (info == null) {
271 return GSXML.DOC_TYPE_HIERARCHY;
272 }
273
274 String childtype = info.getInfo("childtype");
275 if (childtype.equals("Paged")) {
276 return GSXML.DOC_TYPE_PAGED;
277 }
278 return GSXML.DOC_TYPE_HIERARCHY;
279 }
280
281 /** returns the content of a node
282 * should return a nodeContent element:
283 * <nodeContent>text content or other elements</nodeContent>
284 */
285 abstract protected Element getNodeContent(String doc_id) throws GSException;
286
287 protected String getMetadata(String node_id, DBInfo info,
288 String metadata, String lang) {
289 boolean multiple = false;
290 String relation = "";
291 String separator = ", ";
292 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
293 if (pos ==-1) {
294 // just a plain meta entry eg dc.Title
295 return macro_resolver.resolve((String)info.getInfo(metadata), lang, MacroResolver.SCOPE_META, node_id);
296 }
297
298 String temp = metadata.substring(0, pos);
299 metadata = metadata.substring(pos+1);
300 // check for all on the front
301 if (temp.equals("all")) {
302 multiple=true;
303 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
304 if (pos ==-1) {
305 temp = "";
306 } else {
307 temp = metadata.substring(0, pos);
308 metadata = metadata.substring(pos+1);
309 }
310 }
311
312 // now check for relational info
313 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendents"
314 relation = temp;
315 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
316 if (pos == -1) {
317 temp = "";
318 } else {
319 temp = metadata.substring(0, pos);
320 metadata = metadata.substring(pos+1);
321 }
322 }
323
324 // now look for separator info
325 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
326 separator = temp.substring(1, temp.length()-1);
327
328 }
329
330 String relation_id = node_id;
331 if (relation.equals("parent") || relation.equals("ancestors")) {
332 relation_id = OID.getParent(node_id);
333 // parent or ancestor does not include self
334 if (relation_id.equals(node_id)){
335 return "";
336 }
337 } else if (relation.equals("root")) {
338 relation_id = OID.getTop(node_id);
339 }
340
341 // now we either have a single node, or we have ancestors
342 DBInfo relation_info;
343 if (relation_id.equals(node_id)) {
344 relation_info = info;
345 } else {
346 relation_info = this.gdbm_src.getInfo(relation_id);
347 }
348 if (relation_info == null) {
349 return "";
350 }
351
352 StringBuffer result = new StringBuffer();
353
354 if (!multiple) {
355 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
356 } else {
357 // we have multiple meta
358 Vector values = relation_info.getMultiInfo(metadata);
359 if (values != null) {
360 boolean first = true;
361 for (int i=0; i<values.size(); i++) {
362 if (first) {
363 first = false;
364 } else {
365 result.append(separator);
366 }
367 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
368 }
369 }
370 }
371 // if not ancestors, then this is all we do
372 if (!relation.equals("ancestors")) {
373 return result.toString();
374 }
375
376 // now do the ancestors
377 String current_id = relation_id;
378 relation_id = OID.getParent(current_id);
379 while (!relation_id.equals(current_id)) {
380 relation_info = this.gdbm_src.getInfo(relation_id);
381 if (relation_info == null) return result.toString();
382 if (!multiple) {
383 result.insert(0, separator);
384 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
385 } else {
386 Vector values = relation_info.getMultiInfo(metadata);
387 if (values != null) {
388 for (int i=values.size()-1; i>=0; i--) {
389 result.insert(0, separator);
390 result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
391 }
392 }
393
394 }
395 current_id = relation_id;
396 relation_id = OID.getParent(current_id);
397 }
398 return result.toString();
399 }
400
401
402 /** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
403 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
404 {
405 DBInfo info = null;
406 if (doc_content.indexOf("_httpdocimg_")!=-1) {
407 String top_doc_id = OID.getTop(doc_id);
408 info = this.gdbm_src.getInfo(top_doc_id);
409 if (info == null) {
410 // perhaps we had per.iods in the ids - just try the current id
411 top_doc_id = doc_id;
412 info = this.gdbm_src.getInfo(top_doc_id);
413 }
414 if (info != null) {
415 String archivedir = info.getInfo("archivedir");
416 String image_dir = this.site_http_address + "/collect/"+this.cluster_name+"/index/assoc/"+archivedir;
417
418 // Resolve all "_httpdocimg_"s
419 doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
420 }
421 }
422 // resolve any collection specific macros
423 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
424 return doc_content;
425 }
426
427 protected Element getInfo(String doc_id, String info_type) {
428
429 String value="";
430 if (info_type.equals(INFO_NUM_SIBS)) {
431 String parent_id = OID.getParent(doc_id);
432 if (parent_id.equals(doc_id)) {
433 value="0";
434 } else {
435 value = String.valueOf(getNumChildren(parent_id));
436 }
437 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
438 value = String.valueOf(getNumChildren(doc_id));
439 } else if (info_type.equals(INFO_SIB_POS)) {
440 String parent_id = OID.getParent(doc_id);
441 if (parent_id.equals(doc_id)) {
442 value="-1";
443 } else {
444 DBInfo info = this.gdbm_src.getInfo(parent_id);
445 if (info==null) {
446 value ="-1";
447 } else {
448 String contains = info.getInfo("contains");
449 contains = contains.replaceAll("\"", parent_id);
450 String [] children = contains.split(";");
451 for (int i=0;i<children.length;i++) {
452 String child_id = children[i];
453 if (child_id.equals(doc_id)) {
454 value = String.valueOf(i+1); // make it from 1 to length
455 break;
456 }
457 }
458 }
459 }
460 } else {
461 return null;
462 }
463 Element info_elem = this.doc.createElement("info");
464 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
465 info_elem.setAttribute(GSXML.VALUE_ATT, value);
466 return info_elem;
467 }
468
469}
Note: See TracBrowser for help on using the repository browser.