source: branches/ant-install-branch/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 9529

Last change on this file since 9529 was 9529, checked in by kjdon, 19 years ago

changed the location of the GDBMWrapper and DBInfo classes, so had to change some of the import statements

  • Property svn:keywords set to Author Date Id Revision
File size: 14.3 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.util.GSXML;
23import org.greenstone.gsdl3.util.GSFile;
24import org.greenstone.gsdl3.util.OID;
25import org.greenstone.gsdl3.util.MacroResolver;
26import org.greenstone.gsdl3.util.GS2MacroResolver;
27import org.greenstone.gsdl3.util.GSConstants;
28import org.greenstone.gsdl3.util.GDBMWrapper;
29import org.greenstone.gsdl3.util.DBInfo;
30// XML classes
31import org.w3c.dom.Document;
32import org.w3c.dom.Element;
33import org.w3c.dom.NodeList;
34
35// General Java classes
36import java.io.File;
37import java.util.StringTokenizer;
38import java.util.Vector;
39import java.util.Set;
40import java.util.Iterator;
41import java.util.ArrayList;
42
43/** Implements the generic retrieval and classifier services for GS2
44 * collections.
45 *
46 * @author <a href="mailto:[email protected]">Katherine Don</a>
47 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
48 */
49
50public abstract class AbstractGS2DocumentRetrieve
51 extends AbstractDocumentRetrieve {
52
53 protected static final String INDEX_STEM_ELEM = "indexStem";
54
55 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
56 protected String index_stem = null;
57
58 protected GDBMWrapper gdbm_src = null;
59
60
61 /** constructor */
62 protected AbstractGS2DocumentRetrieve()
63 {
64 this.gdbm_src = new GDBMWrapper();
65 this.macro_resolver = new GS2MacroResolver(gdbm_src);
66 }
67
68
69 /** configure this service */
70 public boolean configure(Element info, Element extra_info)
71 {
72
73 System.out.println("Configuring AbstractGS2DocumentRetrieve...");
74 //this.config_info = info;
75
76 // Open GDBM database for querying
77 String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
78 if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
79 System.err.println("AbstractGS2DocumentRetrieve Error: Could not open GDBM database!");
80 return false;
81 }
82
83 // the index stem is either specified in the config file or is the collection name
84 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
85 if (index_stem_elem != null) {
86 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
87 }
88 if (this.index_stem == null || this.index_stem.equals("")) {
89 System.err.println("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
90 this.index_stem = this.cluster_name;
91 }
92
93
94 return super.configure(info, extra_info);
95
96 }
97
98 /** if id ends in .fc, .pc etc, then translate it to the correct id */
99 protected String translateId(String node_id) {
100 return this.gdbm_src.translateOID(node_id);
101 }
102
103 /** if an id is not a greenstone id (an external id) then translate
104 it to a greenstone one*/
105 protected String translateExternalId(String node_id){
106 return this.gdbm_src.externalId2OID(node_id);
107 }
108
109 /** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
110 protected String getRootId(String node_id) {
111 return OID.getTop(node_id);
112 }
113 /** returns a list of the child ids in order, null if no children */
114 protected ArrayList getChildrenIds(String node_id) {
115 DBInfo info = this.gdbm_src.getInfo(node_id);
116 if (info == null) {
117 return null;
118 }
119
120 String contains = info.getInfo("contains");
121 if (contains.equals("")) {
122 return null;
123 }
124 ArrayList children = new ArrayList();
125 StringTokenizer st = new StringTokenizer(contains, ";");
126 while (st.hasMoreTokens()) {
127 String child_id = st.nextToken().replaceAll("\"", node_id);
128 children.add(child_id);
129 }
130 return children;
131
132 }
133 /** returns the node id of the parent node, null if no parent */
134 protected String getParentId(String node_id){
135 String parent = OID.getParent(node_id);
136 if (parent.equals(node_id)) {
137 return null;
138 }
139 return parent;
140 }
141
142 /** get the metadata for the classifier node node_id
143 * returns a metadataList element:
144 * <metadataList><metadata name="xxx">value</metadata></metadataList>
145 */
146 // assumes only one value per metadata
147 protected Element getMetadataList(String node_id, boolean all_metadata,
148 ArrayList metadata_names) {
149 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
150 DBInfo info = this.gdbm_src.getInfo(node_id);
151 if (info == null) {
152 return null;
153 }
154 String lang = "en"; // why do we need this??
155 if (all_metadata) {
156 // return everything out of the database
157 Set keys = info.getKeys();
158 Iterator it = keys.iterator();
159 while(it.hasNext()) {
160 String key = (String)it.next();
161 String value = info.getInfo(key);
162 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(value, lang, MacroResolver.SCOPE_META, node_id));
163 }
164
165 } else {
166 for (int i=0; i<metadata_names.size(); i++) {
167 String meta_name = (String) metadata_names.get(i);
168 String value = getMetadata(node_id, info, meta_name, lang);
169 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
170 }
171 }
172 return metadata_list;
173 }
174
175 /** returns the structural information asked for.
176 * info_type may be one of
177 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
178 */
179 protected String getStructureInfo(String doc_id, String info_type) {
180 String value="";
181 if (info_type.equals(INFO_NUM_SIBS)) {
182 String parent_id = OID.getParent(doc_id);
183 if (parent_id.equals(doc_id)) {
184 value="0";
185 } else {
186 value = String.valueOf(getNumChildren(parent_id));
187 }
188 return value;
189 }
190
191 if (info_type.equals(INFO_NUM_CHILDREN)) {
192 return String.valueOf(getNumChildren(doc_id));
193 }
194
195
196 if (info_type.equals(INFO_SIB_POS)) {
197 String parent_id = OID.getParent(doc_id);
198 if (parent_id.equals(doc_id)) {
199 return "-1";
200 }
201
202 DBInfo info = this.gdbm_src.getInfo(parent_id);
203 if (info==null) {
204 return "-1";
205 }
206
207 String contains = info.getInfo("contains");
208 contains = contains.replaceAll("\"", parent_id);
209 String [] children = contains.split(";");
210 for (int i=0;i<children.length;i++) {
211 String child_id = children[i];
212 if (child_id.equals(doc_id)) {
213 return String.valueOf(i+1); // make it from 1 to length
214
215 }
216 }
217
218 return "-1";
219 } else {
220 return null;
221 }
222
223 }
224
225 protected int getNumChildren(String node_id) {
226 DBInfo info = this.gdbm_src.getInfo(node_id);
227 if (info == null) {
228 return 0;
229 }
230 String contains = info.getInfo("contains");
231 if (contains.equals("")) {
232 return 0;
233 }
234 String [] children = contains.split(";");
235 return children.length;
236 }
237
238 /** returns the document type of the doc that the specified node
239 belongs to. should be one of
240 GSXML.DOC_TYPE_SIMPLE,
241 GSXML.DOC_TYPE_PAGED,
242 GSXML.DOC_TYPE_HIERARCHY
243 */
244 protected String getDocType(String node_id) {
245 DBInfo info = this.gdbm_src.getInfo(node_id);
246 if (info == null) {
247 return GSXML.DOC_TYPE_SIMPLE;
248 }
249 String doc_type = info.getInfo("doctype");
250 if (!doc_type.equals("")&&!doc_type.equals("doc")) {
251 return doc_type;
252 }
253
254 String top_id = OID.getTop(node_id);
255 boolean is_top = (top_id.equals(node_id) ? true : false);
256
257 String children = info.getInfo("contains");
258 boolean is_leaf = (children.equals("") ? true : false);
259
260 if (is_top && is_leaf) { // a single section document
261 return GSXML.DOC_TYPE_SIMPLE;
262 }
263
264 // now we just check the top node
265 if (!is_top) { // we need to look at the top info
266 info = this.gdbm_src.getInfo(top_id);
267 }
268 if (info == null) {
269 return GSXML.DOC_TYPE_HIERARCHY;
270 }
271
272 String childtype = info.getInfo("childtype");
273 if (childtype.equals("Paged")) {
274 return GSXML.DOC_TYPE_PAGED;
275 }
276 return GSXML.DOC_TYPE_HIERARCHY;
277 }
278
279 /** returns the content of a node
280 * should return a nodeContent element:
281 * <nodeContent>text content or other elements</nodeContent>
282 */
283 abstract protected Element getNodeContent(String doc_id);
284
285 protected String getMetadata(String node_id, DBInfo info,
286 String metadata, String lang) {
287 boolean multiple = false;
288 String relation = "";
289 String separator = ", ";
290 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
291 if (pos ==-1) {
292 // just a plain meta entry eg dc.Title
293 return macro_resolver.resolve((String)info.getInfo(metadata), lang, MacroResolver.SCOPE_META, node_id);
294 }
295
296 String temp = metadata.substring(0, pos);
297 metadata = metadata.substring(pos+1);
298 // check for all on the front
299 if (temp.equals("all")) {
300 multiple=true;
301 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
302 if (pos ==-1) {
303 temp = "";
304 } else {
305 temp = metadata.substring(0, pos);
306 metadata = metadata.substring(pos+1);
307 }
308 }
309
310 // now check for relational info
311 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendents"
312 relation = temp;
313 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
314 if (pos == -1) {
315 temp = "";
316 } else {
317 temp = metadata.substring(0, pos);
318 metadata = metadata.substring(pos+1);
319 }
320 }
321
322 // now look for separator info
323 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
324 separator = temp.substring(1, temp.length()-1);
325
326 }
327
328 String relation_id = node_id;
329 if (relation.equals("parent") || relation.equals("ancestors")) {
330 relation_id = OID.getParent(node_id);
331 // parent or ancestor does not include self
332 if (relation_id.equals(node_id)){
333 return "";
334 }
335 } else if (relation.equals("root")) {
336 relation_id = OID.getTop(node_id);
337 }
338
339 // now we either have a single node, or we have ancestors
340 DBInfo relation_info;
341 if (relation_id.equals(node_id)) {
342 relation_info = info;
343 } else {
344 relation_info = this.gdbm_src.getInfo(relation_id);
345 }
346 if (relation_info == null) {
347 return "";
348 }
349
350 StringBuffer result = new StringBuffer();
351
352 if (!multiple) {
353 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
354 } else {
355 // we have multiple meta
356 Vector values = relation_info.getMultiInfo(metadata);
357 if (values != null) {
358 boolean first = true;
359 for (int i=0; i<values.size(); i++) {
360 if (first) {
361 first = false;
362 } else {
363 result.append(separator);
364 }
365 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
366 }
367 }
368 }
369 // if not ancestors, then this is all we do
370 if (!relation.equals("ancestors")) {
371 return result.toString();
372 }
373
374 // now do the ancestors
375 String current_id = relation_id;
376 relation_id = OID.getParent(current_id);
377 while (!relation_id.equals(current_id)) {
378 relation_info = this.gdbm_src.getInfo(relation_id);
379 if (relation_info == null) return result.toString();
380 if (!multiple) {
381 result.insert(0, separator);
382 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
383 } else {
384 Vector values = relation_info.getMultiInfo(metadata);
385 if (values != null) {
386 for (int i=values.size()-1; i>=0; i--) {
387 result.insert(0, separator);
388 result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
389 }
390 }
391
392 }
393 current_id = relation_id;
394 relation_id = OID.getParent(current_id);
395 }
396 return result.toString();
397 }
398
399
400 /** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
401 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
402 {
403 DBInfo info = null;
404 if (doc_content.indexOf("_httpdocimg_")!=-1) {
405 String top_doc_id = OID.getTop(doc_id);
406 info = this.gdbm_src.getInfo(top_doc_id);
407 if (info == null) {
408 // perhaps we had per.iods in the ids - just try the current id
409 top_doc_id = doc_id;
410 info = this.gdbm_src.getInfo(top_doc_id);
411 }
412 if (info != null) {
413 String archivedir = info.getInfo("archivedir");
414 String image_dir = this.site_http_address + "/collect/"+this.cluster_name+"/index/assoc/"+archivedir;
415
416 // Resolve all "_httpdocimg_"s
417 doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
418 }
419 }
420 // resolve any collection specific macros
421 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
422 return doc_content;
423 }
424
425 protected Element getInfo(String doc_id, String info_type) {
426
427 String value="";
428 if (info_type.equals(INFO_NUM_SIBS)) {
429 String parent_id = OID.getParent(doc_id);
430 if (parent_id.equals(doc_id)) {
431 value="0";
432 } else {
433 value = String.valueOf(getNumChildren(parent_id));
434 }
435 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
436 value = String.valueOf(getNumChildren(doc_id));
437 } else if (info_type.equals(INFO_SIB_POS)) {
438 String parent_id = OID.getParent(doc_id);
439 if (parent_id.equals(doc_id)) {
440 value="-1";
441 } else {
442 DBInfo info = this.gdbm_src.getInfo(parent_id);
443 if (info==null) {
444 value ="-1";
445 } else {
446 String contains = info.getInfo("contains");
447 contains = contains.replaceAll("\"", parent_id);
448 String [] children = contains.split(";");
449 for (int i=0;i<children.length;i++) {
450 String child_id = children[i];
451 if (child_id.equals(doc_id)) {
452 value = String.valueOf(i+1); // make it from 1 to length
453 break;
454 }
455 }
456 }
457 }
458 } else {
459 return null;
460 }
461 Element info_elem = this.doc.createElement("info");
462 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
463 info_elem.setAttribute(GSXML.VALUE_ATT, value);
464 return info_elem;
465 }
466
467}
Note: See TracBrowser for help on using the repository browser.