Context Navigation

source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/GS2LuceneRetrieve.java@ 16780

Last change on this file since 16780 was 15326, checked in by kjdon, 16 years ago
added support for JDBM (or other) in place of GDBM: use SimpleCollectionDatabase instead of GDBMWrapper. new Element in buildConfig file: databaseType, set to gdbm or jdbm. If not present, assume gdbm. Also may be some small style changes to some files
Property svn:keywords set to `Author Date Id Revision`
File size: 5.5 KB

Rev	Line
[13240]	1	/*
	2	* GS2LuceneRetrieve.java
	3	* Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	* GNU General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	18	*/
	19	package org.greenstone.gsdl3.service;
	20
	21	// Greenstone classes
	22	import org.greenstone.gsdl3.core.GSException;
	23	import org.greenstone.gsdl3.util.GSFile;
	24	import org.greenstone.gsdl3.util.GSXML;
[13576]	25	import org.greenstone.gsdl3.util.DBInfo;
	26	import org.greenstone.gsdl3.util.GSHTML;
	27	import org.greenstone.gsdl3.util.OID;
[13240]	28	// XML classes
[13576]	29	import org.w3c.dom.Document;
[13240]	30	import org.w3c.dom.Element;
	31	import org.w3c.dom.Text;
	32
	33	// General Java classes
	34	import java.io.File;
	35
[13270]	36	import org.apache.log4j.Logger;
[13240]	37
[13576]	38	/** Retrieve documents from a gs2 lucene collection. Note that this doesn't
	39	actually use lucene, as the documents are stored in XML files */
[13240]	40	public class GS2LuceneRetrieve
	41	extends AbstractGS2DocumentRetrieve
	42	{
[13576]	43
	44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName());
	45
	46
	47	protected static final String DOC_LEVEL="Doc";
	48	protected static final String SEC_LEVEL="Sec";
	49	protected static final String ID_ATT = "gs2:id";
[13240]	50
[13576]	51	// Parameters used
[13240]	52	private static final String LEVEL_PARAM = "level";
[13576]	53
[13240]	54	// Elements used in the config file that are specific to this class
	55	private static final String DEFAULT_LEVEL_ELEM = "defaultLevel";
[13576]	56
	57	private String default_level = null;
	58	private String text_dir = null;
[13240]	59
[13576]	60	private boolean text_available = true;
[13240]	61
	62	public GS2LuceneRetrieve() {
	63	}
[13576]	64
[13240]	65	public void cleanUp() {
	66	super.cleanUp();
	67	}
[13576]	68
[13240]	69	/** configure this service */
	70	public boolean configure(Element info, Element extra_info)
	71	{
	72	if (!super.configure(info, extra_info)){
	73	return false;
	74	}
[13576]	75
[13240]	76	// Do specific configuration
	77	logger.info("Configuring GS2LuceneRetrieve...");
[13576]	78
	79	text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar+"text"+File.separatorChar;
	80	if (!(new File(text_dir).isDirectory())) {
	81	logger.error("Text directory "+text_dir+" does not exist, will be unable to retrieve text for "+cluster_name);
	82	text_available = false;
	83	return true; // return true so that we still get the other services for the collection
	84	}
	85	// Get the default level out of <defaultLevel> (buildConfig.xml)
	86	Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM);
	87	if (def != null) {
[13916]	88	this.default_level = def.getAttribute(GSXML.SHORTNAME_ATT);
[13576]	89	}
	90	if (this.default_level == null \|\| this.default_level.equals("")) {
	91	logger.error("Default level not specified for "+this.cluster_name+", assuming "+DOC_LEVEL);
	92	this.default_level = DOC_LEVEL;
	93	}
	94
[13240]	95	return true;
[13576]	96
[13240]	97	}
	98
	99	/** returns the content of a node
	100	* should return a nodeContent element:
	101	* <nodeContent>text content or other elements</nodeContent>
	102	*/
[13576]	103	protected Element getNodeContent(String doc_id, String lang) throws GSException {
	104	String doc_content = getTextString("TextRetrievalError", lang);
	105	try {
	106	if (!text_available) {
	107	throw new Exception("No text directory available");
	108	}
	109
[15326]	110	String doc_num = this.coll_db.OID2Docnum(doc_id);
	111	if (doc_num == null \|\| doc_num.equals("")) {
[13576]	112	throw new Exception("OID "+doc_id +" couldn't be converted to lucene doc num");
	113	}
[13240]	114
[15326]	115	DBInfo info=this.coll_db.getInfo(OID.getTop(doc_id));
[13576]	116	if (info == null) {
[15326]	117	throw new Exception("Couldn't get database entry for "+OID.getTop(doc_id));
[13576]	118	}
	119
	120	String archivedir=info.getInfo("archivedir");
	121	File doc_xml_file = new File(text_dir+archivedir+File.separatorChar+"doc.xml");
	122	if (!doc_xml_file.isFile()) {
	123	throw new Exception("Doc XML file "+doc_xml_file.getPath()+" does not exist");
	124	}
	125	Document doc_xml_doc = this.converter.getDOM(doc_xml_file);
	126	if (doc_xml_doc == null) {
	127	throw new Exception("Couldn't parse file "+doc_xml_file.getPath());
	128	}
	129	Element full_document = doc_xml_doc.getDocumentElement();
	130	if (full_document == null) {
	131	throw new Exception("Couldn't parse file "+doc_xml_file.getPath());
	132	}
	133	Element current_section = null;
	134	if (default_level.equals(DOC_LEVEL)) {
	135	current_section = full_document;
	136	} else {
[15326]	137	current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, doc_num);
[13576]	138	}
	139	if (current_section == null) {
	140	throw new Exception("Couldn't find section "+ doc_num+" in file "+doc_xml_file.getPath());
	141	}
	142	doc_content = GSXML.getNodeText(current_section);
	143	if (doc_content == null) {
	144	doc_content = "";
	145	} else {
	146	doc_content = resolveTextMacros(doc_content, doc_id, lang);
	147	}
[13240]	148	} catch (Exception e) {
[13576]	149	logger.error("Error trying to get document text for "+doc_id+" in collection "+this.cluster_name+": "+e);
[13240]	150	}
[13576]	151
	152	Element content_node = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
[13240]	153	Text t = this.doc.createTextNode(doc_content);
	154	content_node.appendChild(t);
	155	return content_node;
	156	}
	157	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: