[8959] | 1 | /*
|
---|
| 2 | * AbstractGS2DocumentRetrieve.java
|
---|
| 3 | * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
|
---|
| 4 | *
|
---|
| 5 | * This program is free software; you can redistribute it and/or modify
|
---|
| 6 | * it under the terms of the GNU General Public License as published by
|
---|
| 7 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 8 | * (at your option) any later version.
|
---|
| 9 | *
|
---|
| 10 | * This program is distributed in the hope that it will be useful,
|
---|
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 13 | * GNU General Public License for more details.
|
---|
| 14 | *
|
---|
| 15 | * You should have received a copy of the GNU General Public License
|
---|
| 16 | * along with this program; if not, write to the Free Software
|
---|
| 17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 18 | */
|
---|
| 19 | package org.greenstone.gsdl3.service;
|
---|
| 20 |
|
---|
| 21 | // Greenstone classes
|
---|
[9874] | 22 | import org.greenstone.gsdl3.core.GSException;
|
---|
[26046] | 23 | import org.greenstone.gsdl3.util.BasicDocumentDatabase;
|
---|
[8959] | 24 | import org.greenstone.gsdl3.util.GSXML;
|
---|
| 25 | import org.greenstone.gsdl3.util.GSFile;
|
---|
| 26 | import org.greenstone.gsdl3.util.OID;
|
---|
| 27 | import org.greenstone.gsdl3.util.MacroResolver;
|
---|
| 28 | import org.greenstone.gsdl3.util.GS2MacroResolver;
|
---|
| 29 | import org.greenstone.gsdl3.util.GSConstants;
|
---|
[15326] | 30 | import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
|
---|
[9874] | 31 | import org.greenstone.gsdl3.util.DBInfo;
|
---|
[8959] | 32 | // XML classes
|
---|
| 33 | import org.w3c.dom.Document;
|
---|
[25427] | 34 | import org.w3c.dom.Element;
|
---|
[8959] | 35 | import org.w3c.dom.NodeList;
|
---|
| 36 |
|
---|
| 37 | // General Java classes
|
---|
| 38 | import java.io.File;
|
---|
| 39 | import java.util.StringTokenizer;
|
---|
| 40 | import java.util.Vector;
|
---|
| 41 | import java.util.Set;
|
---|
| 42 | import java.util.Iterator;
|
---|
| 43 | import java.util.ArrayList;
|
---|
| 44 |
|
---|
[13124] | 45 | import org.apache.log4j.*;
|
---|
| 46 |
|
---|
[23792] | 47 | // Apache Commons
|
---|
| 48 | import org.apache.commons.lang3.*;
|
---|
| 49 |
|
---|
[25427] | 50 | /**
|
---|
| 51 | * Implements the generic retrieval and classifier services for GS2 collections.
|
---|
| 52 | *
|
---|
[21663] | 53 | * @author Katherine Don
|
---|
| 54 | * @author Michael Dewsnip
|
---|
[8959] | 55 | */
|
---|
| 56 |
|
---|
[25427] | 57 | public abstract class AbstractGS2DocumentRetrieve extends AbstractDocumentRetrieve
|
---|
| 58 | {
|
---|
[8959] | 59 |
|
---|
[25427] | 60 | static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2DocumentRetrieve.class.getName());
|
---|
[13124] | 61 |
|
---|
[25427] | 62 | // protected static final String EXTLINK_PARAM = "ext"; here or in base??
|
---|
| 63 | protected String index_stem = null;
|
---|
[9000] | 64 |
|
---|
[25427] | 65 | protected SimpleCollectionDatabase coll_db = null;
|
---|
[26046] | 66 | BasicDocumentDatabase gs_doc_db = null;
|
---|
[25427] | 67 | /** constructor */
|
---|
| 68 | protected AbstractGS2DocumentRetrieve()
|
---|
| 69 | {
|
---|
| 70 | this.macro_resolver = new GS2MacroResolver();
|
---|
[10093] | 71 | }
|
---|
[10651] | 72 |
|
---|
[25427] | 73 | public void cleanUp()
|
---|
| 74 | {
|
---|
| 75 | super.cleanUp();
|
---|
| 76 | this.coll_db.closeDatabase();
|
---|
[26046] | 77 | this.gs_doc_db.cleanUp();
|
---|
[15326] | 78 | }
|
---|
[10651] | 79 |
|
---|
[25427] | 80 | /** configure this service */
|
---|
| 81 | public boolean configure(Element info, Element extra_info)
|
---|
| 82 | {
|
---|
| 83 | if (!super.configure(info, extra_info))
|
---|
| 84 | {
|
---|
| 85 | return false;
|
---|
| 86 | }
|
---|
[15770] | 87 |
|
---|
[25427] | 88 | logger.info("Configuring AbstractGS2DocumentRetrieve...");
|
---|
| 89 | //this.config_info = info;
|
---|
[8959] | 90 |
|
---|
[25427] | 91 | // the index stem is either specified in the config file or is the collection name
|
---|
| 92 | Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
|
---|
| 93 | if (index_stem_elem != null)
|
---|
| 94 | {
|
---|
| 95 | this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
|
---|
| 96 | }
|
---|
| 97 | if (this.index_stem == null || this.index_stem.equals(""))
|
---|
| 98 | {
|
---|
| 99 | logger.error("AbstractGS2DocumentRetrieve.configure(): indexStem element not found, stem will default to collection name");
|
---|
| 100 | this.index_stem = this.cluster_name;
|
---|
| 101 | }
|
---|
[8959] | 102 |
|
---|
[25427] | 103 | // find out what kind of database we have
|
---|
| 104 | Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
|
---|
| 105 | String database_type = null;
|
---|
| 106 | if (database_type_elem != null)
|
---|
| 107 | {
|
---|
| 108 | database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
|
---|
| 109 | }
|
---|
| 110 | if (database_type == null || database_type.equals(""))
|
---|
| 111 | {
|
---|
| 112 | database_type = "gdbm"; // the default
|
---|
| 113 | }
|
---|
| 114 | coll_db = new SimpleCollectionDatabase(database_type);
|
---|
| 115 | if (!coll_db.databaseOK())
|
---|
| 116 | {
|
---|
| 117 | logger.error("Couldn't create the collection database of type " + database_type);
|
---|
| 118 | return false;
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 | // Open database for querying
|
---|
| 122 | String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, this.index_stem, database_type);
|
---|
| 123 | if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ))
|
---|
| 124 | {
|
---|
| 125 | logger.error("Could not open collection database!");
|
---|
| 126 | return false;
|
---|
| 127 | }
|
---|
| 128 |
|
---|
[28966] | 129 | gs_doc_db = new BasicDocumentDatabase(database_type, this.site_home, this.cluster_name, this.index_stem);
|
---|
[26046] | 130 | if (!gs_doc_db.isValid())
|
---|
| 131 | {
|
---|
| 132 | logger.error("Failed to open Document Database.");
|
---|
| 133 | return false;
|
---|
| 134 | }
|
---|
| 135 | this.gs_doc = gs_doc_db;
|
---|
| 136 |
|
---|
[25427] | 137 | // we need to set the database for our GS2 macro resolver
|
---|
| 138 | GS2MacroResolver gs2_macro_resolver = (GS2MacroResolver) this.macro_resolver;
|
---|
| 139 | gs2_macro_resolver.setDB(this.coll_db);
|
---|
[26198] | 140 | // set the class loader in case we have collection specific properties files
|
---|
| 141 | gs2_macro_resolver.setClassLoader(this.class_loader);
|
---|
[25427] | 142 | return true;
|
---|
[8959] | 143 | }
|
---|
| 144 |
|
---|
[25427] | 145 | /** if id ends in .fc, .pc etc, then translate it to the correct id */
|
---|
| 146 | protected String translateId(String node_id)
|
---|
| 147 | {
|
---|
| 148 | return OID.translateOID(this.coll_db, node_id); //return this.coll_db.translateOID(node_id);
|
---|
[8959] | 149 | }
|
---|
[25427] | 150 |
|
---|
| 151 | /**
|
---|
| 152 | * if an id is not a greenstone id (an external id) then translate it to a
|
---|
| 153 | * greenstone one
|
---|
| 154 | */
|
---|
| 155 | protected String translateExternalId(String node_id)
|
---|
| 156 | {
|
---|
| 157 | return this.coll_db.externalId2OID(node_id);
|
---|
[8959] | 158 | }
|
---|
| 159 |
|
---|
[25427] | 160 | /**
|
---|
| 161 | * returns the id of the root node of the document containing node node_id.
|
---|
| 162 | * . may be the same as node_id
|
---|
| 163 | */
|
---|
| 164 | protected String getRootId(String node_id)
|
---|
| 165 | {
|
---|
[26046] | 166 | return this.gs_doc.getRootId(node_id);
|
---|
[8959] | 167 | }
|
---|
| 168 |
|
---|
| 169 |
|
---|
[25427] | 170 |
|
---|
| 171 | /**
|
---|
| 172 | * get the metadata for the classifier node node_id returns a metadataList
|
---|
| 173 | * element: <metadataList><metadata
|
---|
| 174 | * name="xxx">value</metadata></metadataList>
|
---|
| 175 | */
|
---|
[28966] | 176 | protected Element getMetadataList(Document doc, String node_id, boolean all_metadata, ArrayList<String> metadata_names, String lang) throws GSException
|
---|
[25427] | 177 | {
|
---|
[28966] | 178 | Element metadata_list = doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
|
---|
[25427] | 179 | DBInfo info = this.coll_db.getInfo(node_id);
|
---|
| 180 | if (info == null)
|
---|
| 181 | {
|
---|
| 182 | return null;
|
---|
| 183 | }
|
---|
[26198] | 184 |
|
---|
[26090] | 185 | if (all_metadata) // this will get all metadata for current node
|
---|
[25427] | 186 | {
|
---|
| 187 | // return everything out of the database
|
---|
[25635] | 188 | Set<String> keys = info.getKeys();
|
---|
| 189 | Iterator<String> it = keys.iterator();
|
---|
[25427] | 190 | while (it.hasNext())
|
---|
| 191 | {
|
---|
[25635] | 192 | String key = it.next();
|
---|
[25427] | 193 | //String value = info.getInfo(key);
|
---|
[25635] | 194 | Vector<String> values = info.getMultiInfo(key);
|
---|
[25427] | 195 | for (int i = 0; i < values.size(); i++)
|
---|
| 196 | {
|
---|
[28966] | 197 | GSXML.addMetadata(metadata_list, key, this.macro_resolver.resolve(values.elementAt(i), lang, MacroResolver.SCOPE_META, node_id));
|
---|
[25427] | 198 | }
|
---|
| 199 | }
|
---|
[8959] | 200 |
|
---|
[25427] | 201 | }
|
---|
[26090] | 202 | // now we go through the list of names. If we have specified
|
---|
| 203 | // all_metadata, then here we only get the ones like
|
---|
| 204 | // parent_Title, that are not the current node.
|
---|
| 205 | for (int i = 0; i < metadata_names.size(); i++)
|
---|
| 206 | {
|
---|
| 207 | String meta_name = metadata_names.get(i);
|
---|
| 208 |
|
---|
| 209 | if (!all_metadata || meta_name.indexOf(GSConstants.META_RELATION_SEP)!=-1) {
|
---|
| 210 | Vector <String> values = getMetadata(node_id, info, meta_name, lang);
|
---|
| 211 | if (values != null) {
|
---|
| 212 | for (int j = 0; j < values.size(); j++)
|
---|
[25427] | 213 | {
|
---|
[26090] | 214 | // some of these may be parent/ancestor. does resolve need a different id???
|
---|
[28966] | 215 | GSXML.addMetadata(metadata_list, meta_name, this.macro_resolver.resolve(values.elementAt(j), lang, MacroResolver.SCOPE_META, node_id));
|
---|
[25427] | 216 | }
|
---|
[26090] | 217 | }
|
---|
| 218 | }
|
---|
| 219 | }
|
---|
| 220 |
|
---|
[25427] | 221 | return metadata_list;
|
---|
[8959] | 222 | }
|
---|
| 223 |
|
---|
[26090] | 224 | protected Vector<String> getMetadata(String node_id, DBInfo info, String metadata, String lang) {
|
---|
[8959] | 225 |
|
---|
[26090] | 226 | DBInfo current_info = info;
|
---|
| 227 |
|
---|
| 228 | int index = metadata.indexOf(GSConstants.META_RELATION_SEP);
|
---|
| 229 | if (index == -1) {
|
---|
| 230 | // metadata is for this node
|
---|
[29523] | 231 | return info.getMultiInfo(metadata);
|
---|
[26090] | 232 | }
|
---|
| 233 | // we need to get metadata for one or more different nodes
|
---|
| 234 | String relation = metadata.substring(0, index);
|
---|
| 235 | String relation_id="";
|
---|
| 236 | metadata = metadata.substring(index + 1);
|
---|
[29523] | 237 | if (relation.equals("root")) {
|
---|
| 238 | relation_id = OID.getTop(node_id);
|
---|
| 239 | if (relation_id.equals(node_id)) {
|
---|
| 240 | // use the current node info
|
---|
| 241 | return info.getMultiInfo(metadata);
|
---|
| 242 | } else {
|
---|
| 243 | return getMetaValuesForOID(relation_id, metadata);
|
---|
| 244 | }
|
---|
| 245 | }
|
---|
| 246 | if (relation.equals("parent")) {
|
---|
[26090] | 247 | relation_id = OID.getParent(node_id);
|
---|
| 248 | if (relation_id.equals(node_id)) {
|
---|
[29523] | 249 | // no parent
|
---|
[26090] | 250 | return null;
|
---|
| 251 | }
|
---|
[29523] | 252 | return getMetaValuesForOID(relation_id, metadata);
|
---|
[26090] | 253 | }
|
---|
| 254 |
|
---|
[29523] | 255 | if (relation.equals("ancestors")) {
|
---|
| 256 | if (OID.isTop(node_id)) {
|
---|
| 257 | return null;
|
---|
| 258 | }
|
---|
| 259 | String current_id = node_id;
|
---|
| 260 | relation_id = OID.getParent(current_id);
|
---|
| 261 | Vector<String> values = new Vector<String>();
|
---|
| 262 | while (!relation_id.equals(current_id)) {
|
---|
| 263 |
|
---|
| 264 | Vector<String> more_values = getMetaValuesForOID(relation_id, metadata);
|
---|
| 265 | if (more_values != null) {
|
---|
| 266 | values.addAll(0, more_values);
|
---|
| 267 | }
|
---|
| 268 | current_id = relation_id;
|
---|
| 269 | relation_id = OID.getParent(current_id);
|
---|
| 270 | }
|
---|
| 271 | return values;
|
---|
[26090] | 272 | }
|
---|
[29523] | 273 | if (relation.equals("siblings")) {
|
---|
| 274 | String parent_id = OID.getParent(node_id);
|
---|
| 275 | if (parent_id.equals(node_id)) {
|
---|
| 276 | // no parent, therefore no siblings
|
---|
[26090] | 277 | return null;
|
---|
| 278 | }
|
---|
[29523] | 279 | // siblings is the same as asking for children of the parent
|
---|
| 280 | node_id = parent_id;
|
---|
| 281 | relation = "children";
|
---|
| 282 | current_info = this.coll_db.getInfo(parent_id);
|
---|
| 283 | if (current_info == null) {
|
---|
| 284 | return null;
|
---|
| 285 | }
|
---|
| 286 | }
|
---|
| 287 | if (relation.equals("children")) {
|
---|
| 288 | Vector<String> values = new Vector<String>();
|
---|
| 289 | String contains = current_info.getInfo("contains");
|
---|
| 290 | contains = StringUtils.replace(contains, "\"", node_id);
|
---|
| 291 | String[] children = contains.split(";");
|
---|
| 292 | for (int i = 0; i < children.length; i++) {
|
---|
| 293 |
|
---|
| 294 | String child_id = children[i];
|
---|
| 295 | Vector<String> more_values = getMetaValuesForOID(child_id, metadata);
|
---|
| 296 | if (more_values != null) {
|
---|
| 297 | values.addAll(more_values);
|
---|
| 298 | }
|
---|
| 299 | }
|
---|
[26090] | 300 | return values;
|
---|
[29523] | 301 | }
|
---|
| 302 | if (relation.equals("descendents")) {
|
---|
| 303 | return null;
|
---|
[26090] | 304 | }
|
---|
[29523] | 305 | // unknown relation
|
---|
| 306 | logger.error("asked for relation "+relation+" and don't understand it.");
|
---|
| 307 | return null;
|
---|
| 308 | }
|
---|
| 309 |
|
---|
| 310 | // } else {
|
---|
| 311 | // if (relation.equals("parent") || relation.equals("ancestors")) {
|
---|
| 312 | // relation_id = OID.getParent(node_id);
|
---|
| 313 | // if (relation_id.equals(node_id)) {
|
---|
| 314 | // return null;
|
---|
| 315 | // }
|
---|
| 316 | // } else if (relation.equals("root")) {
|
---|
| 317 | // relation_id = OID.getTop(node_id);
|
---|
| 318 | // }
|
---|
[26090] | 319 |
|
---|
[29523] | 320 | // DBInfo relation_info;
|
---|
| 321 | // if (relation_id.equals(node_id)) {
|
---|
| 322 | // relation_info = info;
|
---|
| 323 | // } else {
|
---|
| 324 | // relation_info = this.coll_db.getInfo(relation_id);
|
---|
| 325 | // }
|
---|
| 326 | // if (relation_info == null)
|
---|
| 327 | // {
|
---|
| 328 | // return null;
|
---|
| 329 | // }
|
---|
[26090] | 330 |
|
---|
[29523] | 331 | // Vector<String> values = relation_info.getMultiInfo(metadata);
|
---|
| 332 | // // do resolving
|
---|
| 333 | // if (!relation.equals("ancestors")){
|
---|
| 334 | // return values;
|
---|
| 335 | // }
|
---|
| 336 |
|
---|
| 337 | // // ancestors: go up the chain
|
---|
| 338 |
|
---|
| 339 | // String current_id = relation_id;
|
---|
| 340 | // relation_id = OID.getParent(current_id);
|
---|
| 341 | // while (!relation_id.equals(current_id))
|
---|
| 342 | // {
|
---|
| 343 | // relation_info = this.coll_db.getInfo(relation_id);
|
---|
| 344 | // if (relation_info == null)
|
---|
| 345 | // return values;
|
---|
[26090] | 346 |
|
---|
[29523] | 347 | // Vector<String> more_values = relation_info.getMultiInfo(metadata);
|
---|
| 348 | // if (more_values != null)
|
---|
| 349 | // {
|
---|
| 350 | // values.addAll(0, more_values);
|
---|
| 351 | // }
|
---|
[26090] | 352 |
|
---|
| 353 |
|
---|
[29523] | 354 | // current_id = relation_id;
|
---|
| 355 | // relation_id = OID.getParent(current_id);
|
---|
| 356 | // }
|
---|
| 357 | // return values; // for now
|
---|
| 358 | // }
|
---|
| 359 |
|
---|
| 360 | protected Vector<String> getMetaValuesForOID(String oid, String metadata) {
|
---|
| 361 | DBInfo info = this.coll_db.getInfo(oid);
|
---|
| 362 | if (info == null) {
|
---|
| 363 | return null;
|
---|
[26090] | 364 | }
|
---|
[29523] | 365 |
|
---|
| 366 | Vector<String> values = info.getMultiInfo(metadata);
|
---|
| 367 | // lets look through the values and look for [xxx] things. We need to look up metadata for them.
|
---|
| 368 | if (values == null) { return values; }
|
---|
| 369 |
|
---|
| 370 | for (int j = 0; j < values.size(); j++) {
|
---|
| 371 | String val = values.elementAt(j);
|
---|
| 372 | if (val.contains("[")) {
|
---|
| 373 | // look for metadata refs
|
---|
| 374 | String [] metas = StringUtils.substringsBetween(val, "[", "]");
|
---|
| 375 | for (int i=0; i<metas.length; i++) {
|
---|
| 376 | String meta = metas[i];
|
---|
| 377 | String meta_val = info.getInfo(meta);
|
---|
| 378 | if (!meta_val.equals("")) {
|
---|
| 379 | val = StringUtils.replace(val,"["+meta+"]",meta_val);
|
---|
| 380 | }
|
---|
| 381 | }
|
---|
| 382 | values.set(j,val);
|
---|
| 383 | }
|
---|
| 384 | }
|
---|
| 385 | return values;
|
---|
| 386 | }
|
---|
[25427] | 387 | protected int getNumChildren(String node_id)
|
---|
| 388 | {
|
---|
[26046] | 389 | return this.gs_doc.getNumChildren(node_id);
|
---|
[8959] | 390 | }
|
---|
| 391 |
|
---|
| 392 |
|
---|
[25427] | 393 | /**
|
---|
| 394 | * returns the content of a node should return a nodeContent element:
|
---|
| 395 | * <nodeContent>text content or other elements</nodeContent>
|
---|
| 396 | */
|
---|
[28966] | 397 | abstract protected Element getNodeContent(Document doc, String doc_id, String lang) throws GSException;
|
---|
[25427] | 398 |
|
---|
[8959] | 399 |
|
---|
[25427] | 400 | /**
|
---|
| 401 | * needs to get info from collection database - if the calling code gets it
|
---|
| 402 | * already it may pay to pass it in instead
|
---|
| 403 | */
|
---|
| 404 | protected String resolveTextMacros(String doc_content, String doc_id, String lang)
|
---|
| 405 | {
|
---|
| 406 | // resolve any collection specific macros
|
---|
| 407 | doc_content = macro_resolver.resolve(doc_content, lang, MacroResolver.SCOPE_TEXT, doc_id);
|
---|
| 408 | return doc_content;
|
---|
| 409 | }
|
---|
[14527] | 410 |
|
---|
[25427] | 411 |
|
---|
| 412 |
|
---|
| 413 | }
|
---|