source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 9000

Last change on this file since 9000 was 9000, checked in by kjdon, 19 years ago

added indexStem info into configure

  • Property svn:keywords set to Author Date Id Revision
File size: 14.2 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gdbm.*;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29
30// XML classes
31import org.w3c.dom.Document;
32import org.w3c.dom.Element;
33import org.w3c.dom.NodeList;
34
35// General Java classes
36import java.io.File;
37import java.util.StringTokenizer;
38import java.util.Vector;
39import java.util.Set;
40import java.util.Iterator;
41import java.util.ArrayList;
42
43/** Implements the generic retrieval and classifier services for GS2
44 * collections.
45 *
46 * @author <a href="mailto:[email protected]">Katherine Don</a>
47 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
48 */
49
50public abstract class AbstractGS2DocumentRetrieve
51 extends AbstractDocumentRetrieve {
52
53 protected static final String INDEX_STEM_ELEM = "indexStem";
54
55 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
56 protected String index_stem = null;
57
58 protected GDBMWrapper gdbm_src = null;
59
60
61 /** constructor */
62 protected AbstractGS2DocumentRetrieve()
63 {
64 this.gdbm_src = new GDBMWrapper();
65 this.macro_resolver = new GS2MacroResolver(gdbm_src);
66 }
67
68
69 /** configure this service */
70 public boolean configure(Element info, Element extra_info)
71 {
72
73 System.out.println("Configuring AbstractGS2DocumentRetrieve...");
74 //this.config_info = info;
75
76 // Open GDBM database for querying
77 String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
78 if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
79 System.err.println("AbstractGS2DocumentRetrieve Error: Could not open GDBM database!");
80 return false;
81 }
82
83 // the index stem is either specified in the config file or is the collection name
84 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
85 if (index_stem_elem != null) {
86 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
87 }
88 if (this.index_stem == null || this.index_stem.equals("")) {
89 System.err.println("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
90 this.index_stem = this.cluster_name;
91 }
92
93
94 return super.configure(info, extra_info);
95
96 }
97
98 /** if id ends in .fc, .pc etc, then translate it to the correct id */
99 protected String translateId(String node_id) {
100 return this.gdbm_src.translateOID(node_id);
101 }
102
103 /** if an id is not a greenstone id (an external id) then translate
104 it to a greenstone one*/
105 protected String translateExternalId(String node_id){
106 return this.gdbm_src.externalId2OID(node_id);
107 }
108
109 /** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
110 protected String getRootId(String node_id) {
111 return OID.getTop(node_id);
112 }
113 /** returns a list of the child ids in order, null if no children */
114 protected ArrayList getChildrenIds(String node_id) {
115 DBInfo info = this.gdbm_src.getInfo(node_id);
116 if (info == null) {
117 return null;
118 }
119
120 String contains = info.getInfo("contains");
121 if (contains.equals("")) {
122 return null;
123 }
124 ArrayList children = new ArrayList();
125 StringTokenizer st = new StringTokenizer(contains, ";");
126 while (st.hasMoreTokens()) {
127 String child_id = st.nextToken().replaceAll("\"", node_id);
128 children.add(child_id);
129 }
130 return children;
131
132 }
133 /** returns the node id of the parent node, null if no parent */
134 protected String getParentId(String node_id){
135 String parent = OID.getParent(node_id);
136 if (parent.equals(node_id)) {
137 return null;
138 }
139 return parent;
140 }
141
142 /** get the metadata for the classifier node node_id
143 * returns a metadataList element:
144 * <metadataList><metadata name="xxx">value</metadata></metadataList>
145 */
146 // assumes only one value per metadata
147 protected Element getMetadataList(String node_id, boolean all_metadata,
148 ArrayList metadata_names) {
149 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
150 DBInfo info = this.gdbm_src.getInfo(node_id);
151 if (info == null) {
152 return null;
153 }
154 String lang = "en"; // why do we need this??
155 if (all_metadata) {
156 // return everything out of the database
157 Set keys = info.getKeys();
158 Iterator it = keys.iterator();
159 while(it.hasNext()) {
160 String key = (String)it.next();
161 String value = info.getInfo(key);
162 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(value, lang, MacroResolver.SCOPE_META, node_id));
163 }
164
165 } else {
166 for (int i=0; i<metadata_names.size(); i++) {
167 String meta_name = (String) metadata_names.get(i);
168 String value = getMetadata(node_id, info, meta_name, lang);
169 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
170 }
171 }
172 return metadata_list;
173 }
174
175 /** returns the structural information asked for.
176 * info_type may be one of
177 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
178 */
179 protected String getStructureInfo(String doc_id, String info_type) {
180 String value="";
181 if (info_type.equals(INFO_NUM_SIBS)) {
182 String parent_id = OID.getParent(doc_id);
183 if (parent_id.equals(doc_id)) {
184 value="0";
185 } else {
186 value = String.valueOf(getNumChildren(parent_id));
187 }
188 return value;
189 }
190
191 if (info_type.equals(INFO_NUM_CHILDREN)) {
192 return String.valueOf(getNumChildren(doc_id));
193 }
194
195
196 if (info_type.equals(INFO_SIB_POS)) {
197 String parent_id = OID.getParent(doc_id);
198 if (parent_id.equals(doc_id)) {
199 return "-1";
200 }
201
202 DBInfo info = this.gdbm_src.getInfo(parent_id);
203 if (info==null) {
204 return "-1";
205 }
206
207 String contains = info.getInfo("contains");
208 contains = contains.replaceAll("\"", parent_id);
209 String [] children = contains.split(";");
210 for (int i=0;i<children.length;i++) {
211 String child_id = children[i];
212 if (child_id.equals(doc_id)) {
213 return String.valueOf(i+1); // make it from 1 to length
214
215 }
216 }
217
218 return "-1";
219 } else {
220 return null;
221 }
222
223 }
224
225 protected int getNumChildren(String node_id) {
226 DBInfo info = this.gdbm_src.getInfo(node_id);
227 if (info == null) {
228 return 0;
229 }
230 String contains = info.getInfo("contains");
231 if (contains.equals("")) {
232 return 0;
233 }
234 String [] children = contains.split(";");
235 return children.length;
236 }
237
238 /** returns the document type of the doc that the specified node
239 belongs to. should be one of
240 GSXML.DOC_TYPE_SIMPLE,
241 GSXML.DOC_TYPE_PAGED,
242 GSXML.DOC_TYPE_HIERARCHY
243 */
244 protected String getDocType(String node_id) {
245 DBInfo info = this.gdbm_src.getInfo(node_id);
246 if (info == null) {
247 return GSXML.DOC_TYPE_SIMPLE;
248 }
249 String doc_type = info.getInfo("doctype");
250 if (!doc_type.equals("")&&!doc_type.equals("doc")) {
251 return doc_type;
252 }
253
254 String top_id = OID.getTop(node_id);
255 boolean is_top = (top_id.equals(node_id) ? true : false);
256
257 String children = info.getInfo("contains");
258 boolean is_leaf = (children.equals("") ? true : false);
259
260 if (is_top && is_leaf) { // a single section document
261 return GSXML.DOC_TYPE_SIMPLE;
262 }
263
264 // now we just check the top node
265 if (!is_top) { // we need to look at the top info
266 info = this.gdbm_src.getInfo(top_id);
267 }
268 if (info == null) {
269 return GSXML.DOC_TYPE_HIERARCHY;
270 }
271
272 String childtype = info.getInfo("childtype");
273 if (childtype.equals("Paged")) {
274 return GSXML.DOC_TYPE_PAGED;
275 }
276 return GSXML.DOC_TYPE_HIERARCHY;
277 }
278
279 /** returns the content of a node
280 * should return a nodeContent element:
281 * <nodeContent>text content or other elements</nodeContent>
282 */
283 abstract protected Element getNodeContent(String doc_id);
284
285 protected String getMetadata(String node_id, DBInfo info,
286 String metadata, String lang) {
287 boolean multiple = false;
288 String relation = "";
289 String separator = ", ";
290 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
291 if (pos ==-1) {
292 // just a plain meta entry eg dc.Title
293 return macro_resolver.resolve((String)info.getInfo(metadata), lang, MacroResolver.SCOPE_META, node_id);
294 }
295
296 String temp = metadata.substring(0, pos);
297 metadata = metadata.substring(pos+1);
298 // check for all on the front
299 if (temp.equals("all")) {
300 multiple=true;
301 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
302 if (pos ==-1) {
303 temp = "";
304 } else {
305 temp = metadata.substring(0, pos);
306 metadata = metadata.substring(pos+1);
307 }
308 }
309
310 // now check for relational info
311 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendents"
312 relation = temp;
313 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
314 if (pos == -1) {
315 temp = "";
316 } else {
317 temp = metadata.substring(0, pos);
318 metadata = metadata.substring(pos+1);
319 }
320 }
321
322 // now look for separator info
323 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
324 separator = temp.substring(1, temp.length()-1);
325
326 }
327
328 String relation_id = node_id;
329 if (relation.equals("parent") || relation.equals("ancestors")) {
330 relation_id = OID.getParent(node_id);
331 // parent or ancestor does not include self
332 if (relation_id.equals(node_id)){
333 return "";
334 }
335 } else if (relation.equals("root")) {
336 relation_id = OID.getTop(node_id);
337 }
338
339 // now we either have a single node, or we have ancestors
340 DBInfo relation_info;
341 if (relation_id.equals(node_id)) {
342 relation_info = info;
343 } else {
344 relation_info = this.gdbm_src.getInfo(relation_id);
345 }
346 if (relation_info == null) {
347 return "";
348 }
349
350 StringBuffer result = new StringBuffer();
351
352 if (!multiple) {
353 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
354 } else {
355 // we have multiple meta
356 Vector values = relation_info.getMultiInfo(metadata);
357 if (values != null) {
358 boolean first = true;
359 for (int i=0; i<values.size(); i++) {
360 if (first) {
361 first = false;
362 } else {
363 result.append(separator);
364 }
365 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
366 }
367 }
368 }
369 // if not ancestors, then this is all we do
370 if (!relation.equals("ancestors")) {
371 return result.toString();
372 }
373
374 // now do the ancestors
375 String current_id = relation_id;
376 relation_id = OID.getParent(current_id);
377 while (!relation_id.equals(current_id)) {
378 relation_info = this.gdbm_src.getInfo(relation_id);
379 if (relation_info == null) return result.toString();
380 if (!multiple) {
381 result.insert(0, separator);
382 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
383 } else {
384 Vector values = relation_info.getMultiInfo(metadata);
385 if (values != null) {
386 for (int i=values.size()-1; i>=0; i--) {
387 result.insert(0, separator);
388 result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
389 }
390 }
391
392 }
393 current_id = relation_id;
394 relation_id = OID.getParent(current_id);
395 }
396 return result.toString();
397 }
398
399
400 /** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
401 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
402 {
403 DBInfo info = null;
404 if (doc_content.indexOf("_httpdocimg_")!=-1) {
405 String top_doc_id = OID.getTop(doc_id);
406 info = this.gdbm_src.getInfo(top_doc_id);
407 if (info == null) {
408 // perhaps we had per.iods in the ids - just try the current id
409 top_doc_id = doc_id;
410 info = this.gdbm_src.getInfo(top_doc_id);
411 }
412 if (info != null) {
413 String archivedir = info.getInfo("archivedir");
414 String image_dir = this.site_http_address + "/collect/"+this.cluster_name+"/index/assoc/"+archivedir;
415
416 // Resolve all "_httpdocimg_"s
417 doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
418 }
419 }
420 // resolve any collection specific macros
421 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
422 return doc_content;
423 }
424
425 protected Element getInfo(String doc_id, String info_type) {
426
427 String value="";
428 if (info_type.equals(INFO_NUM_SIBS)) {
429 String parent_id = OID.getParent(doc_id);
430 if (parent_id.equals(doc_id)) {
431 value="0";
432 } else {
433 value = String.valueOf(getNumChildren(parent_id));
434 }
435 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
436 value = String.valueOf(getNumChildren(doc_id));
437 } else if (info_type.equals(INFO_SIB_POS)) {
438 String parent_id = OID.getParent(doc_id);
439 if (parent_id.equals(doc_id)) {
440 value="-1";
441 } else {
442 DBInfo info = this.gdbm_src.getInfo(parent_id);
443 if (info==null) {
444 value ="-1";
445 } else {
446 String contains = info.getInfo("contains");
447 contains = contains.replaceAll("\"", parent_id);
448 String [] children = contains.split(";");
449 for (int i=0;i<children.length;i++) {
450 String child_id = children[i];
451 if (child_id.equals(doc_id)) {
452 value = String.valueOf(i+1); // make it from 1 to length
453 break;
454 }
455 }
456 }
457 }
458 } else {
459 return null;
460 }
461 Element info_elem = this.doc.createElement("info");
462 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
463 info_elem.setAttribute(GSXML.VALUE_ATT, value);
464 return info_elem;
465 }
466
467}
Note: See TracBrowser for help on using the repository browser.