Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneRetrieve.java@ 32212

Last change on this file since 32212 was 32160, checked in by Georgiy Litvinov, 6 years ago
Cache last document to speed up multisection documents load.
Property svn:keywords set to `Author Date Id Revision`
File size: 5.7 KB

Line
1	/*
2	* GS2LuceneRetrieve.java
3	* Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.GSException;
23	import org.greenstone.gsdl3.util.GSFile;
24	import org.greenstone.gsdl3.util.GSXML;
25	import org.greenstone.gsdl3.util.DBInfo;
26	import org.greenstone.gsdl3.util.GSHTML;
27	import org.greenstone.gsdl3.util.OID;
28	// XML classes
29	import org.w3c.dom.Document;
30	import org.w3c.dom.Element;
31	import org.w3c.dom.Text;
32
33	// General Java classes
34	import java.io.File;
35
36	import org.apache.log4j.Logger;
37
38	/**
39	* Retrieve documents from a gs2 lucene collection. Note that this doesn't
40	* actually use lucene, as the documents are stored in XML files
41	*/
42	public class GS2LuceneRetrieve extends AbstractGS2DocumentRetrieve
43	{
44
45	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName());
46
47	protected static final String DOC_LEVEL = "Doc";
48	protected static final String SEC_LEVEL = "Sec";
49	protected static final String ID_ATT = "gs2:docOID";
50
51	// Parameters used
52	private static final String LEVEL_PARAM = "level";
53
54	// Elements used in the config file that are specific to this class
55	private static final String DEFAULT_LEVEL_ELEM = "defaultLevel";
56
57	private String default_level = null;
58	private String text_dir = null;
59
60	private boolean text_available = true;
61	protected Element lastXMLDocumentElement = null;
62	protected File lastXMLDocumentFile = null;
63
64
65	public GS2LuceneRetrieve()
66	{
67	}
68
69	public void cleanUp()
70	{
71	super.cleanUp();
72	}
73
74	/** configure this service */
75	public boolean configure(Element info, Element extra_info)
76	{
77	if (!super.configure(info, extra_info))
78	{
79	return false;
80	}
81
82	// Do specific configuration
83	logger.info("Configuring GS2LuceneRetrieve...");
84
85	text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar + "text" + File.separatorChar;
86	if (!(new File(text_dir).isDirectory()))
87	{
88	logger.error("Text directory " + text_dir + " does not exist, will be unable to retrieve text for " + cluster_name);
89	text_available = false;
90	return true; // return true so that we still get the other services for the collection
91	}
92	// Get the default level out of <defaultLevel> (buildConfig.xml)
93	Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM);
94	if (def != null)
95	{
96	this.default_level = def.getAttribute(GSXML.SHORTNAME_ATT);
97	}
98	if (this.default_level == null \|\| this.default_level.equals(""))
99	{
100	logger.error("Default level not specified for " + this.cluster_name + ", assuming " + DOC_LEVEL);
101	this.default_level = DOC_LEVEL;
102	}
103
104	return true;
105
106	}
107
108	/**
109	* returns the content of a node should return a nodeContent element:
110	* <nodeContent>text content or other elements</nodeContent>
111	*/
112	protected Element getNodeContent(Document doc, String doc_id, String lang) throws GSException
113	{
114	String[] args = new String[1];
115	args[0] = doc_id;
116	String doc_content = getTextString("TextRetrievalError", lang, args);
117	try
118	{
119	if (!text_available)
120	{
121	throw new Exception("No text directory available");
122	}
123
124	DBInfo info = this.coll_db.getInfo(OID.getTop(doc_id));
125	if (info == null)
126	{
127	throw new Exception("Couldn't get database entry for " + OID.getTop(doc_id));
128	}
129
130	String archivedir = info.getInfo("archivedir");
131	File doc_xml_file = new File(text_dir + archivedir + File.separatorChar + "doc.xml");
132	if (!doc_xml_file.isFile())
133	{
134	throw new Exception("Doc XML file " + doc_xml_file.getPath() + " does not exist");
135	}
136	Element full_document = null;
137	if (lastXMLDocumentFile != null && lastXMLDocumentFile.equals(doc_xml_file)) {
138	full_document = lastXMLDocumentElement;
139	} else {
140	Document doc_xml_doc = this.converter.getDOM(doc_xml_file, "utf-8");
141	if (doc_xml_doc == null)
142	{
143	throw new Exception("Couldn't parse file " + doc_xml_file.getPath());
144	}
145	full_document = doc_xml_doc.getDocumentElement();
146	if (full_document == null)
147	{
148	throw new Exception("Couldn't parse file " + doc_xml_file.getPath());
149	}
150	}
151	lastXMLDocumentElement = full_document;
152	lastXMLDocumentFile = doc_xml_file;
153
154	Element current_section = null;
155	if (default_level.equals(DOC_LEVEL))
156	{
157	current_section = full_document;
158	}
159	else
160	{
161	current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, doc_id);
162	}
163	if (current_section == null)
164	{
165	throw new Exception("Couldn't find section " + doc_id + " in file " + doc_xml_file.getPath());
166	}
167	doc_content = GSXML.getNodeText(current_section);
168	if (doc_content == null)
169	{
170	doc_content = "";
171	}
172	else
173	{
174	doc_content = resolveTextMacros(doc_content, doc_id, lang);
175	}
176	}
177	catch (Exception e)
178	{
179	logger.error("Error trying to get document text for " + doc_id + " in collection " + this.cluster_name + ": " + e);
180	}
181
182	Element content_node = doc.createElement(GSXML.NODE_CONTENT_ELEM);
183	Text t = doc.createTextNode(doc_content);
184	content_node.appendChild(t);
185	return content_node;
186	}
187	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: