source: branches/ant-install-branch/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 9824

Last change on this file since 9824 was 9824, checked in by kjdon, 19 years ago

when a collection (using gdbm) is opened by tomcat, windows holds a lock on the gdbm file, so you can't rebuild it. modified ModuleInterface to have a cleanUp method, so all modules need to implement this. for mg/mgpp and gdbm modules, they now unload the index data or close the connection to the database. so cleanUp should be called whenever you deactivate a module

  • Property svn:keywords set to Author Date Id Revision
File size: 14.5 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.MacroResolver;
27import org.greenstone.gsdl3.util.GS2MacroResolver;
28import org.greenstone.gsdl3.util.GSConstants;
29import org.greenstone.gsdl3.util.GDBMWrapper;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44/** Implements the generic retrieval and classifier services for GS2
45 * collections.
46 *
47 * @author <a href="mailto:[email protected]">Katherine Don</a>
48 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
49 */
50
51public abstract class AbstractGS2DocumentRetrieve
52 extends AbstractDocumentRetrieve {
53
54 protected static final String INDEX_STEM_ELEM = "indexStem";
55
56 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
57 protected String index_stem = null;
58
59 protected GDBMWrapper gdbm_src = null;
60
61
62 /** constructor */
63 protected AbstractGS2DocumentRetrieve()
64 {
65 this.gdbm_src = new GDBMWrapper();
66 this.macro_resolver = new GS2MacroResolver(gdbm_src);
67 }
68
69 public void cleanUp() {
70 super.cleanUp();
71 this.gdbm_src.closeDatabase();
72 }
73 /** configure this service */
74 public boolean configure(Element info, Element extra_info)
75 {
76
77 System.out.println("Configuring AbstractGS2DocumentRetrieve...");
78 //this.config_info = info;
79
80 // Open GDBM database for querying
81 String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
82 if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
83 System.err.println("AbstractGS2DocumentRetrieve Error: Could not open GDBM database!");
84 return false;
85 }
86
87 // the index stem is either specified in the config file or is the collection name
88 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
89 if (index_stem_elem != null) {
90 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
91 }
92 if (this.index_stem == null || this.index_stem.equals("")) {
93 System.err.println("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
94 this.index_stem = this.cluster_name;
95 }
96
97
98 return super.configure(info, extra_info);
99
100 }
101
102 /** if id ends in .fc, .pc etc, then translate it to the correct id */
103 protected String translateId(String node_id) {
104 return this.gdbm_src.translateOID(node_id);
105 }
106
107 /** if an id is not a greenstone id (an external id) then translate
108 it to a greenstone one*/
109 protected String translateExternalId(String node_id){
110 return this.gdbm_src.externalId2OID(node_id);
111 }
112
113 /** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
114 protected String getRootId(String node_id) {
115 return OID.getTop(node_id);
116 }
117 /** returns a list of the child ids in order, null if no children */
118 protected ArrayList getChildrenIds(String node_id) {
119 DBInfo info = this.gdbm_src.getInfo(node_id);
120 if (info == null) {
121 return null;
122 }
123
124 String contains = info.getInfo("contains");
125 if (contains.equals("")) {
126 return null;
127 }
128 ArrayList children = new ArrayList();
129 StringTokenizer st = new StringTokenizer(contains, ";");
130 while (st.hasMoreTokens()) {
131 String child_id = st.nextToken().replaceAll("\"", node_id);
132 children.add(child_id);
133 }
134 return children;
135
136 }
137 /** returns the node id of the parent node, null if no parent */
138 protected String getParentId(String node_id){
139 String parent = OID.getParent(node_id);
140 if (parent.equals(node_id)) {
141 return null;
142 }
143 return parent;
144 }
145
146 /** get the metadata for the classifier node node_id
147 * returns a metadataList element:
148 * <metadataList><metadata name="xxx">value</metadata></metadataList>
149 */
150 // assumes only one value per metadata
151 protected Element getMetadataList(String node_id, boolean all_metadata,
152 ArrayList metadata_names)
153 throws GSException {
154 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
155 DBInfo info = this.gdbm_src.getInfo(node_id);
156 if (info == null) {
157 return null;
158 }
159 String lang = "en"; // why do we need this??
160 if (all_metadata) {
161 // return everything out of the database
162 Set keys = info.getKeys();
163 Iterator it = keys.iterator();
164 while(it.hasNext()) {
165 String key = (String)it.next();
166 String value = info.getInfo(key);
167 GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(value, lang, MacroResolver.SCOPE_META, node_id));
168 }
169
170 } else {
171 for (int i=0; i<metadata_names.size(); i++) {
172 String meta_name = (String) metadata_names.get(i);
173 String value = getMetadata(node_id, info, meta_name, lang);
174 GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
175 }
176 }
177 return metadata_list;
178 }
179
180 /** returns the structural information asked for.
181 * info_type may be one of
182 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
183 */
184 protected String getStructureInfo(String doc_id, String info_type) {
185 String value="";
186 if (info_type.equals(INFO_NUM_SIBS)) {
187 String parent_id = OID.getParent(doc_id);
188 if (parent_id.equals(doc_id)) {
189 value="0";
190 } else {
191 value = String.valueOf(getNumChildren(parent_id));
192 }
193 return value;
194 }
195
196 if (info_type.equals(INFO_NUM_CHILDREN)) {
197 return String.valueOf(getNumChildren(doc_id));
198 }
199
200
201 if (info_type.equals(INFO_SIB_POS)) {
202 String parent_id = OID.getParent(doc_id);
203 if (parent_id.equals(doc_id)) {
204 return "-1";
205 }
206
207 DBInfo info = this.gdbm_src.getInfo(parent_id);
208 if (info==null) {
209 return "-1";
210 }
211
212 String contains = info.getInfo("contains");
213 contains = contains.replaceAll("\"", parent_id);
214 String [] children = contains.split(";");
215 for (int i=0;i<children.length;i++) {
216 String child_id = children[i];
217 if (child_id.equals(doc_id)) {
218 return String.valueOf(i+1); // make it from 1 to length
219
220 }
221 }
222
223 return "-1";
224 } else {
225 return null;
226 }
227
228 }
229
230 protected int getNumChildren(String node_id) {
231 DBInfo info = this.gdbm_src.getInfo(node_id);
232 if (info == null) {
233 return 0;
234 }
235 String contains = info.getInfo("contains");
236 if (contains.equals("")) {
237 return 0;
238 }
239 String [] children = contains.split(";");
240 return children.length;
241 }
242
243 /** returns the document type of the doc that the specified node
244 belongs to. should be one of
245 GSXML.DOC_TYPE_SIMPLE,
246 GSXML.DOC_TYPE_PAGED,
247 GSXML.DOC_TYPE_HIERARCHY
248 */
249 protected String getDocType(String node_id) {
250 DBInfo info = this.gdbm_src.getInfo(node_id);
251 if (info == null) {
252 return GSXML.DOC_TYPE_SIMPLE;
253 }
254 String doc_type = info.getInfo("doctype");
255 if (!doc_type.equals("")&&!doc_type.equals("doc")) {
256 return doc_type;
257 }
258
259 String top_id = OID.getTop(node_id);
260 boolean is_top = (top_id.equals(node_id) ? true : false);
261
262 String children = info.getInfo("contains");
263 boolean is_leaf = (children.equals("") ? true : false);
264
265 if (is_top && is_leaf) { // a single section document
266 return GSXML.DOC_TYPE_SIMPLE;
267 }
268
269 // now we just check the top node
270 if (!is_top) { // we need to look at the top info
271 info = this.gdbm_src.getInfo(top_id);
272 }
273 if (info == null) {
274 return GSXML.DOC_TYPE_HIERARCHY;
275 }
276
277 String childtype = info.getInfo("childtype");
278 if (childtype.equals("Paged")) {
279 return GSXML.DOC_TYPE_PAGED;
280 }
281 return GSXML.DOC_TYPE_HIERARCHY;
282 }
283
284 /** returns the content of a node
285 * should return a nodeContent element:
286 * <nodeContent>text content or other elements</nodeContent>
287 */
288 abstract protected Element getNodeContent(String doc_id) throws GSException;
289
290 protected String getMetadata(String node_id, DBInfo info,
291 String metadata, String lang) {
292 boolean multiple = false;
293 String relation = "";
294 String separator = ", ";
295 int pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
296 if (pos ==-1) {
297 // just a plain meta entry eg dc.Title
298 return macro_resolver.resolve((String)info.getInfo(metadata), lang, MacroResolver.SCOPE_META, node_id);
299 }
300
301 String temp = metadata.substring(0, pos);
302 metadata = metadata.substring(pos+1);
303 // check for all on the front
304 if (temp.equals("all")) {
305 multiple=true;
306 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
307 if (pos ==-1) {
308 temp = "";
309 } else {
310 temp = metadata.substring(0, pos);
311 metadata = metadata.substring(pos+1);
312 }
313 }
314
315 // now check for relational info
316 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendents"
317 relation = temp;
318 pos = metadata.indexOf(GSConstants.META_RELATION_SEP);
319 if (pos == -1) {
320 temp = "";
321 } else {
322 temp = metadata.substring(0, pos);
323 metadata = metadata.substring(pos+1);
324 }
325 }
326
327 // now look for separator info
328 if (temp.startsWith(GSConstants.META_SEPARATOR_SEP) && temp.endsWith(GSConstants.META_SEPARATOR_SEP)) {
329 separator = temp.substring(1, temp.length()-1);
330
331 }
332
333 String relation_id = node_id;
334 if (relation.equals("parent") || relation.equals("ancestors")) {
335 relation_id = OID.getParent(node_id);
336 // parent or ancestor does not include self
337 if (relation_id.equals(node_id)){
338 return "";
339 }
340 } else if (relation.equals("root")) {
341 relation_id = OID.getTop(node_id);
342 }
343
344 // now we either have a single node, or we have ancestors
345 DBInfo relation_info;
346 if (relation_id.equals(node_id)) {
347 relation_info = info;
348 } else {
349 relation_info = this.gdbm_src.getInfo(relation_id);
350 }
351 if (relation_info == null) {
352 return "";
353 }
354
355 StringBuffer result = new StringBuffer();
356
357 if (!multiple) {
358 result.append(this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
359 } else {
360 // we have multiple meta
361 Vector values = relation_info.getMultiInfo(metadata);
362 if (values != null) {
363 boolean first = true;
364 for (int i=0; i<values.size(); i++) {
365 if (first) {
366 first = false;
367 } else {
368 result.append(separator);
369 }
370 result.append(this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
371 }
372 }
373 }
374 // if not ancestors, then this is all we do
375 if (!relation.equals("ancestors")) {
376 return result.toString();
377 }
378
379 // now do the ancestors
380 String current_id = relation_id;
381 relation_id = OID.getParent(current_id);
382 while (!relation_id.equals(current_id)) {
383 relation_info = this.gdbm_src.getInfo(relation_id);
384 if (relation_info == null) return result.toString();
385 if (!multiple) {
386 result.insert(0, separator);
387 result.insert(0, this.macro_resolver.resolve(relation_info.getInfo(metadata), lang, MacroResolver.SCOPE_META, relation_id));
388 } else {
389 Vector values = relation_info.getMultiInfo(metadata);
390 if (values != null) {
391 for (int i=values.size()-1; i>=0; i--) {
392 result.insert(0, separator);
393 result.insert(0, this.macro_resolver.resolve((String)values.elementAt(i), lang, MacroResolver.SCOPE_META, relation_id));
394 }
395 }
396
397 }
398 current_id = relation_id;
399 relation_id = OID.getParent(current_id);
400 }
401 return result.toString();
402 }
403
404
405 /** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
406 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
407 {
408 DBInfo info = null;
409 if (doc_content.indexOf("_httpdocimg_")!=-1) {
410 String top_doc_id = OID.getTop(doc_id);
411 info = this.gdbm_src.getInfo(top_doc_id);
412 if (info == null) {
413 // perhaps we had per.iods in the ids - just try the current id
414 top_doc_id = doc_id;
415 info = this.gdbm_src.getInfo(top_doc_id);
416 }
417 if (info != null) {
418 String archivedir = info.getInfo("archivedir");
419 String image_dir = this.site_http_address + "/collect/"+this.cluster_name+"/index/assoc/"+archivedir;
420
421 // Resolve all "_httpdocimg_"s
422 doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
423 }
424 }
425 // resolve any collection specific macros
426 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
427 return doc_content;
428 }
429
430 protected Element getInfo(String doc_id, String info_type) {
431
432 String value="";
433 if (info_type.equals(INFO_NUM_SIBS)) {
434 String parent_id = OID.getParent(doc_id);
435 if (parent_id.equals(doc_id)) {
436 value="0";
437 } else {
438 value = String.valueOf(getNumChildren(parent_id));
439 }
440 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
441 value = String.valueOf(getNumChildren(doc_id));
442 } else if (info_type.equals(INFO_SIB_POS)) {
443 String parent_id = OID.getParent(doc_id);
444 if (parent_id.equals(doc_id)) {
445 value="-1";
446 } else {
447 DBInfo info = this.gdbm_src.getInfo(parent_id);
448 if (info==null) {
449 value ="-1";
450 } else {
451 String contains = info.getInfo("contains");
452 contains = contains.replaceAll("\"", parent_id);
453 String [] children = contains.split(";");
454 for (int i=0;i<children.length;i++) {
455 String child_id = children[i];
456 if (child_id.equals(doc_id)) {
457 value = String.valueOf(i+1); // make it from 1 to length
458 break;
459 }
460 }
461 }
462 }
463 } else {
464 return null;
465 }
466 Element info_elem = this.doc.createElement("info");
467 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
468 info_elem.setAttribute(GSXML.VALUE_ATT, value);
469 return info_elem;
470 }
471
472}
Note: See TracBrowser for help on using the repository browser.