Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneRetrieve.java@ 30056

Last change on this file since 30056 was 28966, checked in by kjdon, 10 years ago
Lots of changes. Mainly to do with removing this.doc from everywhere. Document is not thread safe. Now we tend to create a new Document everytime we are starting a new page/message etc. in service this.desc_doc is available as teh document to create service info stuff. But it should only be used for this and not for other messages. newDOM is now static for XMLConverter. method param changes for some GSXML methods.
Property svn:keywords set to `Author Date Id Revision`
File size: 5.4 KB

Rev	Line
[13240]	1	/*
	2	* GS2LuceneRetrieve.java
	3	* Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	* GNU General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	18	*/
	19	package org.greenstone.gsdl3.service;
	20
	21	// Greenstone classes
	22	import org.greenstone.gsdl3.core.GSException;
	23	import org.greenstone.gsdl3.util.GSFile;
	24	import org.greenstone.gsdl3.util.GSXML;
[13576]	25	import org.greenstone.gsdl3.util.DBInfo;
	26	import org.greenstone.gsdl3.util.GSHTML;
	27	import org.greenstone.gsdl3.util.OID;
[13240]	28	// XML classes
[13576]	29	import org.w3c.dom.Document;
[13240]	30	import org.w3c.dom.Element;
	31	import org.w3c.dom.Text;
	32
	33	// General Java classes
	34	import java.io.File;
	35
[13270]	36	import org.apache.log4j.Logger;
[13240]	37
[25649]	38	/**
	39	* Retrieve documents from a gs2 lucene collection. Note that this doesn't
	40	* actually use lucene, as the documents are stored in XML files
	41	*/
	42	public class GS2LuceneRetrieve extends AbstractGS2DocumentRetrieve
[13240]	43	{
	44
[25649]	45	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName());
[13240]	46
[25649]	47	protected static final String DOC_LEVEL = "Doc";
	48	protected static final String SEC_LEVEL = "Sec";
	49	protected static final String ID_ATT = "gs2:docOID";
	50
	51	// Parameters used
	52	private static final String LEVEL_PARAM = "level";
	53
	54	// Elements used in the config file that are specific to this class
	55	private static final String DEFAULT_LEVEL_ELEM = "defaultLevel";
	56
	57	private String default_level = null;
	58	private String text_dir = null;
	59
	60	private boolean text_available = true;
	61
	62	public GS2LuceneRetrieve()
	63	{
[13240]	64	}
[25649]	65
	66	public void cleanUp()
	67	{
	68	super.cleanUp();
[13576]	69	}
[25649]	70
	71	/** configure this service */
	72	public boolean configure(Element info, Element extra_info)
	73	{
	74	if (!super.configure(info, extra_info))
	75	{
	76	return false;
	77	}
	78
	79	// Do specific configuration
	80	logger.info("Configuring GS2LuceneRetrieve...");
	81
	82	text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar + "text" + File.separatorChar;
	83	if (!(new File(text_dir).isDirectory()))
	84	{
	85	logger.error("Text directory " + text_dir + " does not exist, will be unable to retrieve text for " + cluster_name);
	86	text_available = false;
	87	return true; // return true so that we still get the other services for the collection
	88	}
	89	// Get the default level out of <defaultLevel> (buildConfig.xml)
	90	Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM);
	91	if (def != null)
	92	{
	93	this.default_level = def.getAttribute(GSXML.SHORTNAME_ATT);
	94	}
	95	if (this.default_level == null \|\| this.default_level.equals(""))
	96	{
	97	logger.error("Default level not specified for " + this.cluster_name + ", assuming " + DOC_LEVEL);
	98	this.default_level = DOC_LEVEL;
	99	}
	100
	101	return true;
	102
[13576]	103	}
[25649]	104
	105	/**
	106	* returns the content of a node should return a nodeContent element:
	107	* <nodeContent>text content or other elements</nodeContent>
	108	*/
[28966]	109	protected Element getNodeContent(Document doc, String doc_id, String lang) throws GSException
[25649]	110	{
	111	String[] args = new String[1];
	112	args[0] = doc_id;
	113	String doc_content = getTextString("TextRetrievalError", lang, args);
	114	try
	115	{
	116	if (!text_available)
	117	{
	118	throw new Exception("No text directory available");
	119	}
	120
	121	DBInfo info = this.coll_db.getInfo(OID.getTop(doc_id));
	122	if (info == null)
	123	{
	124	throw new Exception("Couldn't get database entry for " + OID.getTop(doc_id));
	125	}
	126
	127	String archivedir = info.getInfo("archivedir");
	128	File doc_xml_file = new File(text_dir + archivedir + File.separatorChar + "doc.xml");
	129	if (!doc_xml_file.isFile())
	130	{
	131	throw new Exception("Doc XML file " + doc_xml_file.getPath() + " does not exist");
	132	}
	133	Document doc_xml_doc = this.converter.getDOM(doc_xml_file, "utf-8");
	134	if (doc_xml_doc == null)
	135	{
	136	throw new Exception("Couldn't parse file " + doc_xml_file.getPath());
	137	}
	138	Element full_document = doc_xml_doc.getDocumentElement();
	139	if (full_document == null)
	140	{
	141	throw new Exception("Couldn't parse file " + doc_xml_file.getPath());
	142	}
	143	Element current_section = null;
	144	if (default_level.equals(DOC_LEVEL))
	145	{
	146	current_section = full_document;
	147	}
	148	else
	149	{
	150	current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, doc_id);
	151	}
	152	if (current_section == null)
	153	{
	154	throw new Exception("Couldn't find section " + doc_id + " in file " + doc_xml_file.getPath());
	155	}
	156	doc_content = GSXML.getNodeText(current_section);
	157	if (doc_content == null)
	158	{
	159	doc_content = "";
	160	}
	161	else
	162	{
	163	doc_content = resolveTextMacros(doc_content, doc_id, lang);
	164	}
	165	}
	166	catch (Exception e)
	167	{
	168	logger.error("Error trying to get document text for " + doc_id + " in collection " + this.cluster_name + ": " + e);
	169	}
	170
[28966]	171	Element content_node = doc.createElement(GSXML.NODE_CONTENT_ELEM);
	172	Text t = doc.createTextNode(doc_content);
[25649]	173	content_node.appendChild(t);
	174	return content_node;
[13240]	175	}
	176	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: