source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2DocumentRetrieve.java@ 32453

Last change on this file since 32453 was 30891, checked in by kjdon, 8 years ago

removed some commented out code. when looking up metadata, if we have an underscore we take the first part off and assume that it is a relation tag, root, parent etc. But if we have gone through our list of relations and it doesn't match, then put it back on and look up the full name, as the user may have underscores in their metadata.

  • Property svn:keywords set to Author Date Id Revision
File size: 11.5 KB
Line 
1/*
2 * AbstractGS2DocumentRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.BasicDocumentDatabase;
24import org.greenstone.gsdl3.util.GSXML;
25import org.greenstone.gsdl3.util.GSFile;
26import org.greenstone.gsdl3.util.OID;
27import org.greenstone.gsdl3.util.MacroResolver;
28import org.greenstone.gsdl3.util.GS2MacroResolver;
29import org.greenstone.gsdl3.util.GSConstants;
30import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
31import org.greenstone.gsdl3.util.DBInfo;
32// XML classes
33import org.w3c.dom.Document;
34import org.w3c.dom.Element;
35import org.w3c.dom.NodeList;
36
37// General Java classes
38import java.io.File;
39import java.util.StringTokenizer;
40import java.util.Vector;
41import java.util.Set;
42import java.util.Iterator;
43import java.util.ArrayList;
44
45import org.apache.log4j.*;
46
47// Apache Commons
48import org.apache.commons.lang3.*;
49
50/**
51 * Implements the generic retrieval and classifier services for GS2 collections.
52 *
53 */
54
55public abstract class AbstractGS2DocumentRetrieve extends AbstractDocumentRetrieve
56{
57
58 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
59
60 // protected static final String EXTLINK_PARAM = "ext"; here or in base??
61 protected String index_stem = null;
62
63 protected SimpleCollectionDatabase coll_db = null;
64 BasicDocumentDatabase gs_doc_db = null;
65 /** constructor */
66 protected AbstractGS2DocumentRetrieve()
67 {
68 this.macro_resolver = new GS2MacroResolver();
69 }
70
71 public void cleanUp()
72 {
73 super.cleanUp();
74 this.coll_db.closeDatabase();
75 this.gs_doc_db.cleanUp();
76 }
77
78 /** configure this service */
79 public boolean configure(Element info, Element extra_info)
80 {
81 if (!super.configure(info, extra_info))
82 {
83 return false;
84 }
85
86 logger.info("Configuring AbstractGS2DocumentRetrieve...");
87 //this.config_info = info;
88
89 // the index stem is either specified in the config file or is the collection name
90 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
91 if (index_stem_elem != null)
92 {
93 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
94 }
95 if (this.index_stem == null || this.index_stem.equals(""))
96 {
97 logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
98 this.index_stem = this.cluster_name;
99 }
100
101 // find out what kind of database we have
102 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
103 String database_type = null;
104 if (database_type_elem != null)
105 {
106 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
107 }
108 if (database_type == null || database_type.equals(""))
109 {
110 database_type = "gdbm"; // the default
111 }
112 coll_db = new SimpleCollectionDatabase(database_type);
113 if (!coll_db.databaseOK())
114 {
115 logger.error("Couldn't create the collection database of type " + database_type);
116 return false;
117 }
118
119 // Open database for querying
120 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
121 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ))
122 {
123 logger.error("Could not open collection database!");
124 return false;
125 }
126
127 gs_doc_db = new BasicDocumentDatabase(database_type, this.site_home, this.cluster_name, this.index_stem);
128 if (!gs_doc_db.isValid())
129 {
130 logger.error("Failed to open Document Database.");
131 return false;
132 }
133 this.gs_doc = gs_doc_db;
134
135 // we need to set the database for our GS2 macro resolver
136 GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver) this.macro_resolver;
137 gs2_macro_resolver.setDB(this.coll_db);
138 // set the class loader in case we have collection specific properties files
139 gs2_macro_resolver.setClassLoader(this.class_loader);
140 return true;
141 }
142
143 /** if id ends in .fc, .pc etc, then translate it to the correct id */
144 protected String translateId(String node_id)
145 {
146 return OID.translateOID(this.coll_db, node_id); //return this.coll_db.translateOID(node_id);
147 }
148
149 /**
150 * if an id is not a greenstone id (an external id) then translate it to a
151 * greenstone one
152 */
153 protected String translateExternalId(String node_id)
154 {
155 return this.coll_db.externalId2OID(node_id);
156 }
157
158 /**
159 * returns the id of the root node of the document containing node node_id.
160 * . may be the same as node_id
161 */
162 protected String getRootId(String node_id)
163 {
164 return this.gs_doc.getRootId(node_id);
165 }
166
167
168
169 /**
170 * get the metadata for the classifier node node_id returns a metadataList
171 * element: <metadataList><metadata
172 * name="xxx">value</metadata></metadataList>
173 */
174 protected Element getMetadataList(Document doc, String node_id, boolean all_metadata, ArrayList<String> metadata_names, String lang) throws GSException
175 {
176 Element metadata_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
177 DBInfo info = this.coll_db.getInfo(node_id);
178 if (info == null)
179 {
180 return null;
181 }
182
183 if (all_metadata) // this will get all metadata for current node
184 {
185 // return everything out of the database
186 Set<String> keys = info.getKeys();
187 Iterator<String> it = keys.iterator();
188 while (it.hasNext())
189 {
190 String key = it.next();
191 //String value = info.getInfo(key);
192 Vector<String> values = info.getMultiInfo(key);
193 for (int i = 0; i < values.size(); i++)
194 {
195 GSXML.addMetadata(metadata_list, key, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
196 }
197 }
198
199 }
200 // now we go through the list of names. If we have specified
201 // all_metadata, then here we only get the ones like
202 // parent_Title, that are not the current node.
203 for (int i = 0; i < metadata_names.size(); i++)
204 {
205 String meta_name = metadata_names.get(i);
206
207 if (!all_metadata || meta_name.indexOf(GSConstants.META_RELATION_SEP)!=-1) {
208 Vector <String> values = getMetadata(node_id, info, meta_name, lang);
209 if (values != null) {
210 for (int j = 0; j < values.size(); j++)
211 {
212 // some of these may be parent/ancestor. does resolve need a different id???
213 GSXML.addMetadata(metadata_list, meta_name, this.macro_resolver.resolve(values.elementAt(j), lang, MacroResolver.SCOPE_META, node_id));
214 }
215 }
216 }
217 }
218
219 return metadata_list;
220 }
221
222 protected Vector<String> getMetadata(String node_id, DBInfo info, String metadata, String lang) {
223
224 DBInfo current_info = info;
225
226 int index = metadata.indexOf(GSConstants.META_RELATION_SEP);
227 if (index == -1) {
228 // metadata is for this node
229 return info.getMultiInfo(metadata);
230 }
231 // we need to get metadata for one or more different nodes
232 // we have a relation root, parent, ancestors, siblings, children, descendents
233 String relation = metadata.substring(0, index);
234 String relation_id="";
235 metadata = metadata.substring(index + 1);
236 if (relation.equals("root")) {
237 relation_id = OID.getTop(node_id);
238 if (relation_id.equals(node_id)) {
239 // use the current node info
240 return info.getMultiInfo(metadata);
241 } else {
242 return getMetaValuesForOID(relation_id, metadata);
243 }
244 }
245 if (relation.equals("parent")) {
246 relation_id = OID.getParent(node_id);
247 if (relation_id.equals(node_id)) {
248 // no parent
249 return null;
250 }
251 return getMetaValuesForOID(relation_id, metadata);
252 }
253
254 if (relation.equals("ancestors")) {
255 if (OID.isTop(node_id)) {
256 return null;
257 }
258 String current_id = node_id;
259 relation_id = OID.getParent(current_id);
260 Vector<String> values = new Vector<String>();
261 while (!relation_id.equals(current_id)) {
262
263 Vector<String> more_values = getMetaValuesForOID(relation_id, metadata);
264 if (more_values != null) {
265 values.addAll(0, more_values);
266 }
267 current_id = relation_id;
268 relation_id = OID.getParent(current_id);
269 }
270 return values;
271 }
272 if (relation.equals("siblings")) {
273 String parent_id = OID.getParent(node_id);
274 if (parent_id.equals(node_id)) {
275 // no parent, therefore no siblings
276 return null;
277 }
278 // siblings is the same as asking for children of the parent
279 node_id = parent_id;
280 relation = "children";
281 current_info = this.coll_db.getInfo(parent_id);
282 if (current_info == null) {
283 return null;
284 }
285 }
286 if (relation.equals("children")) {
287 Vector<String> values = new Vector<String>();
288 String contains = current_info.getInfo("contains");
289 contains = StringUtils.replace(contains, "\"", node_id);
290 String[] children = contains.split(";");
291 for (int i = 0; i < children.length; i++) {
292
293 String child_id = children[i];
294 Vector<String> more_values = getMetaValuesForOID(child_id, metadata);
295 if (more_values != null) {
296 values.addAll(more_values);
297 }
298 }
299 return values;
300 }
301 if (relation.equals("descendents")) {
302 return null;
303 }
304 // unknown relation
305 logger.error("asked for relation "+relation+" and don't understand it, so getting the full metadata name "+relation+GSConstants.META_RELATION_SEP+metadata);
306 // we assume that maybe the metadata has an _ in the name, so get the original name
307 return info.getMultiInfo(relation+GSConstants.META_RELATION_SEP+metadata);
308
309 }
310
311
312 protected Vector<String> getMetaValuesForOID(String oid, String metadata) {
313 DBInfo info = this.coll_db.getInfo(oid);
314 if (info == null) {
315 return null;
316 }
317
318 Vector<String> values = info.getMultiInfo(metadata);
319 // lets look through the values and look for [xxx] things. We need to look up metadata for them.
320 if (values == null) { return values; }
321
322 for (int j = 0; j < values.size(); j++) {
323 String val = values.elementAt(j);
324 if (val.contains("[")) {
325 // look for metadata refs
326 String [] metas = StringUtils.substringsBetween(val, "[", "]");
327 for (int i=0; i<metas.length; i++) {
328 String meta = metas[i];
329 String meta_val = info.getInfo(meta);
330 if (!meta_val.equals("")) {
331 val = StringUtils.replace(val,"["+meta+"]",meta_val);
332 }
333 }
334 values.set(j,val);
335 }
336 }
337 return values;
338 }
339 protected int getNumChildren(String node_id)
340 {
341 return this.gs_doc.getNumChildren(node_id);
342 }
343
344
345 /**
346 * returns the content of a node should return a nodeContent element:
347 * <nodeContent>text content or other elements</nodeContent>
348 */
349 abstract protected Element getNodeContent(Document doc, String doc_id, String lang) throws GSException;
350
351
352 /**
353 * needs to get info from collection database - if the calling code gets it
354 * already it may pay to pass it in instead
355 */
356 protected String resolveTextMacros(String doc_content, String doc_id, String lang)
357 {
358 // resolve any collection specific macros
359 doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
360 return doc_content;
361 }
362
363
364
365}
Note: See TracBrowser for help on using the repository browser.