[8959] | 1 | /*
|
---|
| 2 | * GS2Browse.java
|
---|
| 3 | * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
|
---|
| 4 | *
|
---|
| 5 | * This program is free software; you can redistribute it and/or modify
|
---|
| 6 | * it under the terms of the GNU General Public License as published by
|
---|
| 7 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 8 | * (at your option) any later version.
|
---|
| 9 | *
|
---|
| 10 | * This program is distributed in the hope that it will be useful,
|
---|
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 13 | * GNU General Public License for more details.
|
---|
| 14 | *
|
---|
| 15 | * You should have received a copy of the GNU General Public License
|
---|
| 16 | * along with this program; if not, write to the Free Software
|
---|
| 17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 18 | */
|
---|
| 19 | package org.greenstone.gsdl3.service;
|
---|
| 20 |
|
---|
| 21 | // Greenstone classes
|
---|
| 22 | import org.greenstone.gsdl3.util.OID;
|
---|
| 23 | import org.greenstone.gsdl3.util.GSXML;
|
---|
| 24 | import org.greenstone.gsdl3.util.GSFile;
|
---|
| 25 | import org.greenstone.gsdl3.util.MacroResolver;
|
---|
| 26 | import org.greenstone.gsdl3.util.GS2MacroResolver;
|
---|
[15787] | 27 | import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
|
---|
[9874] | 28 | import org.greenstone.gsdl3.util.DBInfo;
|
---|
[8959] | 29 | // XML classes
|
---|
| 30 | import org.w3c.dom.Document;
|
---|
| 31 | import org.w3c.dom.Element;
|
---|
| 32 | import org.w3c.dom.NodeList;
|
---|
| 33 |
|
---|
| 34 | // General Java classes
|
---|
| 35 | import java.util.ArrayList;
|
---|
| 36 | import java.util.StringTokenizer;
|
---|
| 37 | import java.util.Set;
|
---|
| 38 | import java.util.Iterator;
|
---|
| 39 |
|
---|
[13124] | 40 | import org.apache.log4j.*;
|
---|
| 41 |
|
---|
[8959] | 42 | /** Greenstone 2 collection classifier service
|
---|
| 43 | *
|
---|
| 44 | * @author <a href="mailto:[email protected]">Katherine Don</a>
|
---|
| 45 | * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
|
---|
| 46 | */
|
---|
| 47 |
|
---|
| 48 | public class GS2Browse
|
---|
| 49 | extends AbstractBrowse
|
---|
| 50 | {
|
---|
[13124] | 51 |
|
---|
[13270] | 52 | static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2Browse.class.getName());
|
---|
[13124] | 53 |
|
---|
[15787] | 54 | protected SimpleCollectionDatabase coll_db = null;
|
---|
[8959] | 55 |
|
---|
| 56 | public GS2Browse()
|
---|
| 57 | {
|
---|
| 58 | }
|
---|
| 59 |
|
---|
[9874] | 60 | public void cleanUp() {
|
---|
| 61 | super.cleanUp();
|
---|
[15787] | 62 | this.coll_db.closeDatabase();
|
---|
[9874] | 63 | }
|
---|
| 64 |
|
---|
[8959] | 65 | public boolean configure(Element info, Element extra_info)
|
---|
| 66 | {
|
---|
[10093] | 67 | if (!super.configure(info, extra_info)){
|
---|
| 68 | return false;
|
---|
| 69 | }
|
---|
| 70 |
|
---|
[13124] | 71 | logger.info("Configuring GS2Browse...");
|
---|
[10651] | 72 | // the index stem is either specified in the config file or is the collection name
|
---|
| 73 | Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM);
|
---|
| 74 | String index_stem = null;
|
---|
| 75 | if (index_stem_elem != null) {
|
---|
| 76 | index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
|
---|
| 77 | }
|
---|
| 78 | if (index_stem == null || index_stem.equals("")) {
|
---|
| 79 | index_stem = this.cluster_name;
|
---|
| 80 | }
|
---|
| 81 |
|
---|
[15787] | 82 | // find out what kind of database we have
|
---|
| 83 | Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
|
---|
| 84 | String database_type = null;
|
---|
| 85 | if (database_type_elem != null) {
|
---|
| 86 | database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
|
---|
| 87 | }
|
---|
| 88 | if (database_type == null || database_type.equals("")) {
|
---|
| 89 | database_type = "gdbm"; // the default
|
---|
| 90 | }
|
---|
| 91 | coll_db = new SimpleCollectionDatabase(database_type);
|
---|
| 92 | if (coll_db == null) {
|
---|
| 93 | logger.error("Couldn't create the collection database of type "+database_type);
|
---|
| 94 | return false;
|
---|
| 95 | }
|
---|
| 96 |
|
---|
| 97 | // Open database for querying
|
---|
| 98 | String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, index_stem, database_type);
|
---|
| 99 | if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
|
---|
| 100 | logger.error("Could not open collection database!");
|
---|
[8959] | 101 | return false;
|
---|
| 102 | }
|
---|
[15787] | 103 | this.macro_resolver = new GS2MacroResolver(this.coll_db);
|
---|
[10093] | 104 | return true;
|
---|
[8959] | 105 | }
|
---|
| 106 |
|
---|
| 107 | /** if id ends in .fc, .pc etc, then translate it to the correct id */
|
---|
| 108 | protected String translateId(String node_id) {
|
---|
[15787] | 109 | return this.coll_db.translateOID(node_id);
|
---|
[8959] | 110 | }
|
---|
| 111 |
|
---|
| 112 | /** returns the document type of the doc that the specified node
|
---|
| 113 | belongs to. should be one of
|
---|
| 114 | GSXML.DOC_TYPE_SIMPLE,
|
---|
| 115 | GSXML.DOC_TYPE_PAGED,
|
---|
| 116 | GSXML.DOC_TYPE_HIERARCHY
|
---|
| 117 | */
|
---|
| 118 | protected String getDocType(String node_id) {
|
---|
[15787] | 119 | DBInfo info = this.coll_db.getInfo(node_id);
|
---|
[8959] | 120 | if (info == null) {
|
---|
| 121 | return GSXML.DOC_TYPE_SIMPLE;
|
---|
| 122 | }
|
---|
| 123 | String doc_type = info.getInfo("doctype");
|
---|
| 124 | if (!doc_type.equals("")&&!doc_type.equals("doc")) {
|
---|
| 125 | return doc_type;
|
---|
| 126 | }
|
---|
| 127 |
|
---|
| 128 | String top_id = OID.getTop(node_id);
|
---|
| 129 | boolean is_top = (top_id.equals(node_id) ? true : false);
|
---|
| 130 |
|
---|
| 131 | String children = info.getInfo("contains");
|
---|
| 132 | boolean is_leaf = (children.equals("") ? true : false);
|
---|
| 133 |
|
---|
| 134 | if (is_top && is_leaf) { // a single section document
|
---|
| 135 | return GSXML.DOC_TYPE_SIMPLE;
|
---|
| 136 | }
|
---|
| 137 |
|
---|
| 138 | // now we just check the top node
|
---|
| 139 | if (!is_top) { // we need to look at the top info
|
---|
[15787] | 140 | info = this.coll_db.getInfo(top_id);
|
---|
[8959] | 141 | }
|
---|
| 142 | if (info == null) {
|
---|
| 143 | return GSXML.DOC_TYPE_HIERARCHY;
|
---|
| 144 | }
|
---|
| 145 |
|
---|
| 146 | String childtype = info.getInfo("childtype");
|
---|
| 147 | if (childtype.equals("Paged")) {
|
---|
| 148 | return GSXML.DOC_TYPE_PAGED;
|
---|
| 149 | }
|
---|
| 150 | return GSXML.DOC_TYPE_HIERARCHY;
|
---|
| 151 |
|
---|
| 152 | }
|
---|
| 153 |
|
---|
| 154 | /** returns the id of the root node of the document containing node node_id. . may be the same as node_id */
|
---|
| 155 | protected String getRootId(String node_id) {
|
---|
| 156 | return OID.getTop(node_id);
|
---|
| 157 | }
|
---|
| 158 | /** returns a list of the child ids in order, null if no children */
|
---|
| 159 | protected ArrayList getChildrenIds(String node_id) {
|
---|
[15787] | 160 | DBInfo info = this.coll_db.getInfo(node_id);
|
---|
[8959] | 161 | if (info == null) {
|
---|
| 162 | return null;
|
---|
| 163 | }
|
---|
| 164 |
|
---|
| 165 | ArrayList children = new ArrayList();
|
---|
| 166 |
|
---|
| 167 | String contains = info.getInfo("contains");
|
---|
| 168 | StringTokenizer st = new StringTokenizer(contains, ";");
|
---|
| 169 | while (st.hasMoreTokens()) {
|
---|
| 170 | String child_id = st.nextToken().replaceAll("\"", node_id);
|
---|
| 171 | children.add(child_id);
|
---|
| 172 | }
|
---|
| 173 | return children;
|
---|
| 174 |
|
---|
| 175 | }
|
---|
| 176 | /** returns the node id of the parent node, null if no parent */
|
---|
| 177 | protected String getParentId(String node_id){
|
---|
| 178 | String parent = OID.getParent(node_id);
|
---|
| 179 | if (parent.equals(node_id)) {
|
---|
| 180 | return null;
|
---|
| 181 | }
|
---|
| 182 | return parent;
|
---|
| 183 | }
|
---|
| 184 |
|
---|
[13962] | 185 | protected String getMetadata(String node_id, String key){
|
---|
[15787] | 186 | DBInfo info = this.coll_db.getInfo(node_id);
|
---|
[13962] | 187 | if (info == null) {
|
---|
| 188 | return "";
|
---|
| 189 | }
|
---|
| 190 |
|
---|
| 191 | Set keys = info.getKeys();
|
---|
| 192 | Iterator it = keys.iterator();
|
---|
| 193 | while(it.hasNext()) {
|
---|
| 194 | String key_in = (String)it.next();
|
---|
| 195 | String value = info.getInfo(key);
|
---|
| 196 | if (key_in.equals(key)){
|
---|
| 197 | return value;
|
---|
| 198 | }
|
---|
| 199 | }
|
---|
| 200 |
|
---|
| 201 | return "";
|
---|
| 202 |
|
---|
| 203 | }
|
---|
| 204 |
|
---|
[8959] | 205 | /** get the metadata for the classifier node node_id
|
---|
| 206 | * returns a metadataList element:
|
---|
| 207 | * <metadataList><metadata name="xxx">value</metadata></metadataList>
|
---|
| 208 | * if all_metadata is true, returns all available metadata, otherwise just
|
---|
| 209 | * returns requested metadata
|
---|
| 210 | */
|
---|
| 211 | // assumes only one value per metadata
|
---|
| 212 | protected Element getMetadataList(String node_id, boolean all_metadata,
|
---|
| 213 | ArrayList metadata_names) {
|
---|
| 214 | String lang = "en";
|
---|
| 215 | Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
|
---|
[15787] | 216 | DBInfo info = this.coll_db.getInfo(node_id);
|
---|
[8959] | 217 | if (info == null) {
|
---|
| 218 | return null;
|
---|
| 219 | }
|
---|
| 220 | if (all_metadata) {
|
---|
| 221 | // return everything out of the database
|
---|
| 222 | Set keys = info.getKeys();
|
---|
| 223 | Iterator it = keys.iterator();
|
---|
| 224 | while(it.hasNext()) {
|
---|
| 225 | String key = (String)it.next();
|
---|
| 226 | String value = info.getInfo(key);
|
---|
| 227 | GSXML.addMetadata(this.doc, metadata_list, key, this.macro_resolver.resolve(value, lang, GS2MacroResolver.SCOPE_META, node_id));
|
---|
| 228 | }
|
---|
| 229 |
|
---|
| 230 | } else {
|
---|
| 231 | for (int i=0; i<metadata_names.size(); i++) {
|
---|
| 232 | String meta_name = (String) metadata_names.get(i);
|
---|
| 233 | String value = (String)info.getInfo(meta_name);
|
---|
| 234 | GSXML.addMetadata(this.doc, metadata_list, meta_name, value);
|
---|
| 235 | }
|
---|
| 236 | }
|
---|
| 237 | return metadata_list;
|
---|
| 238 | }
|
---|
| 239 |
|
---|
| 240 | /** returns the structural information asked for.
|
---|
| 241 | * info_type may be one of
|
---|
| 242 | * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
|
---|
| 243 | */
|
---|
| 244 | protected String getStructureInfo(String doc_id, String info_type) {
|
---|
| 245 | String value="";
|
---|
| 246 | if (info_type.equals(INFO_NUM_SIBS)) {
|
---|
| 247 | String parent_id = OID.getParent(doc_id);
|
---|
| 248 | if (parent_id.equals(doc_id)) {
|
---|
| 249 | value="0";
|
---|
| 250 | } else {
|
---|
| 251 | value = String.valueOf(getNumChildren(parent_id));
|
---|
| 252 | }
|
---|
| 253 | return value;
|
---|
| 254 | }
|
---|
| 255 |
|
---|
| 256 | if (info_type.equals(INFO_NUM_CHILDREN)) {
|
---|
| 257 | return String.valueOf(getNumChildren(doc_id));
|
---|
| 258 | }
|
---|
| 259 |
|
---|
| 260 |
|
---|
| 261 | if (info_type.equals(INFO_SIB_POS)) {
|
---|
| 262 | String parent_id = OID.getParent(doc_id);
|
---|
| 263 | if (parent_id.equals(doc_id)) {
|
---|
| 264 | return "-1";
|
---|
| 265 | }
|
---|
| 266 |
|
---|
[15787] | 267 | DBInfo info = this.coll_db.getInfo(parent_id);
|
---|
[8959] | 268 | if (info==null) {
|
---|
| 269 | return "-1";
|
---|
| 270 | }
|
---|
| 271 |
|
---|
| 272 | String contains = info.getInfo("contains");
|
---|
| 273 | contains = contains.replaceAll("\"", parent_id);
|
---|
| 274 | String [] children = contains.split(";");
|
---|
| 275 | for (int i=0;i<children.length;i++) {
|
---|
| 276 | String child_id = children[i];
|
---|
| 277 | if (child_id.equals(doc_id)) {
|
---|
| 278 | return String.valueOf(i+1); // make it from 1 to length
|
---|
| 279 |
|
---|
| 280 | }
|
---|
| 281 | }
|
---|
| 282 |
|
---|
| 283 | return "-1";
|
---|
| 284 | } else {
|
---|
| 285 | return null;
|
---|
| 286 | }
|
---|
| 287 |
|
---|
| 288 | }
|
---|
| 289 |
|
---|
| 290 | protected int getNumChildren(String node_id) {
|
---|
[15787] | 291 | DBInfo info = this.coll_db.getInfo(node_id);
|
---|
[8959] | 292 | if (info == null) {
|
---|
| 293 | return 0;
|
---|
| 294 | }
|
---|
| 295 | String contains = info.getInfo("contains");
|
---|
| 296 | if (contains.equals("")) {
|
---|
| 297 | return 0;
|
---|
| 298 | }
|
---|
| 299 | String [] children = contains.split(";");
|
---|
| 300 | return children.length;
|
---|
| 301 | }
|
---|
| 302 |
|
---|
| 303 | /** returns true if the id refers to a document (rather than
|
---|
| 304 | * a classifier node)
|
---|
| 305 | */
|
---|
| 306 | protected boolean isDocumentId(String node_id){
|
---|
| 307 | if (node_id.startsWith("CL")) {
|
---|
| 308 | return false;
|
---|
| 309 | }
|
---|
| 310 | return true;
|
---|
| 311 | }
|
---|
| 312 |
|
---|
| 313 | }
|
---|