Context Navigation

source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/GS2LuceneRetrieve.java@ 20817

Last change on this file since 20817 was 20817, checked in by kjdon, 15 years ago
we no longer use sequential ids for sections. so can use doc_id to get the section.
Property svn:keywords set to `Author Date Id Revision`
File size: 5.4 KB

Line
1	/*
2	* GS2LuceneRetrieve.java
3	* Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.GSException;
23	import org.greenstone.gsdl3.util.GSFile;
24	import org.greenstone.gsdl3.util.GSXML;
25	import org.greenstone.gsdl3.util.DBInfo;
26	import org.greenstone.gsdl3.util.GSHTML;
27	import org.greenstone.gsdl3.util.OID;
28	// XML classes
29	import org.w3c.dom.Document;
30	import org.w3c.dom.Element;
31	import org.w3c.dom.Text;
32
33	// General Java classes
34	import java.io.File;
35
36	import org.apache.log4j.Logger;
37
38	/** Retrieve documents from a gs2 lucene collection. Note that this doesn't
39	actually use lucene, as the documents are stored in XML files */
40	public class GS2LuceneRetrieve
41	extends AbstractGS2DocumentRetrieve
42	{
43
44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName());
45
46
47	protected static final String DOC_LEVEL="Doc";
48	protected static final String SEC_LEVEL="Sec";
49	protected static final String ID_ATT = "gs2:docOID";
50
51	// Parameters used
52	private static final String LEVEL_PARAM = "level";
53
54	// Elements used in the config file that are specific to this class
55	private static final String DEFAULT_LEVEL_ELEM = "defaultLevel";
56
57	private String default_level = null;
58	private String text_dir = null;
59
60	private boolean text_available = true;
61
62	public GS2LuceneRetrieve() {
63	}
64
65	public void cleanUp() {
66	super.cleanUp();
67	}
68
69	/** configure this service */
70	public boolean configure(Element info, Element extra_info)
71	{
72	if (!super.configure(info, extra_info)){
73	return false;
74	}
75
76	// Do specific configuration
77	logger.info("Configuring GS2LuceneRetrieve...");
78
79	text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar+"text"+File.separatorChar;
80	if (!(new File(text_dir).isDirectory())) {
81	logger.error("Text directory "+text_dir+" does not exist, will be unable to retrieve text for "+cluster_name);
82	text_available = false;
83	return true; // return true so that we still get the other services for the collection
84	}
85	// Get the default level out of <defaultLevel> (buildConfig.xml)
86	Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM);
87	if (def != null) {
88	this.default_level = def.getAttribute(GSXML.SHORTNAME_ATT);
89	}
90	if (this.default_level == null \|\| this.default_level.equals("")) {
91	logger.error("Default level not specified for "+this.cluster_name+", assuming "+DOC_LEVEL);
92	this.default_level = DOC_LEVEL;
93	}
94
95	return true;
96
97	}
98
99	/** returns the content of a node
100	* should return a nodeContent element:
101	* <nodeContent>text content or other elements</nodeContent>
102	*/
103	protected Element getNodeContent(String doc_id, String lang) throws GSException {
104	String [] args = new String[1];
105	args[0] = doc_id;
106	String doc_content = getTextString("TextRetrievalError", lang, args);
107	try {
108	if (!text_available) {
109	throw new Exception("No text directory available");
110	}
111
112	DBInfo info=this.coll_db.getInfo(OID.getTop(doc_id));
113	if (info == null) {
114	throw new Exception("Couldn't get database entry for "+OID.getTop(doc_id));
115	}
116
117	String archivedir=info.getInfo("archivedir");
118	File doc_xml_file = new File(text_dir+archivedir+File.separatorChar+"doc.xml");
119	if (!doc_xml_file.isFile()) {
120	throw new Exception("Doc XML file "+doc_xml_file.getPath()+" does not exist");
121	}
122	Document doc_xml_doc = this.converter.getDOM(doc_xml_file, "utf-8");
123	if (doc_xml_doc == null) {
124	throw new Exception("Couldn't parse file "+doc_xml_file.getPath());
125	}
126	Element full_document = doc_xml_doc.getDocumentElement();
127	if (full_document == null) {
128	throw new Exception("Couldn't parse file "+doc_xml_file.getPath());
129	}
130	Element current_section = null;
131	if (default_level.equals(DOC_LEVEL)) {
132	current_section = full_document;
133	} else {
134	current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, doc_id);
135	}
136	if (current_section == null) {
137	throw new Exception("Couldn't find section "+ doc_id+" in file "+doc_xml_file.getPath());
138	}
139	doc_content = GSXML.getNodeText(current_section);
140	if (doc_content == null) {
141	doc_content = "";
142	} else {
143	doc_content = resolveTextMacros(doc_content, doc_id, lang);
144	}
145	} catch (Exception e) {
146	logger.error("Error trying to get document text for "+doc_id+" in collection "+this.cluster_name+": "+e);
147	}
148
149	Element content_node = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
150	Text t = this.doc.createTextNode(doc_content);
151	content_node.appendChild(t);
152	return content_node;
153	}
154	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: