Context Navigation

source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java@ 18422

Last change on this file since 18422 was 18422, checked in by kjdon, 15 years ago
lucene uses greenstone ids as its internal ids now, so need to make dummy methods that don't do anything for internalNum2OID
Property svn:keywords set to `Author Date Id Revision`
File size: 9.6 KB

Line
1	/*
2	* GS2LuceneSearch.java
3	* Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* the Free Software Foundation; either version 2 of the License, or
7	* (at your option) any later version.
8	*
9	* This program is distributed in the hope that it will be useful,
10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	* GNU General Public License for more details.
13	*
14	* You should have received a copy of the GNU General Public License
15	* along with this program; if not, write to the Free Software
16	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17	*/
18
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.util.*;
23
24	// XML classes
25	import org.w3c.dom.Element;
26	import org.w3c.dom.NodeList;
27	import org.w3c.dom.Document;
28	// java classes
29	import java.util.ArrayList;
30	import java.util.HashMap;
31	import java.io.File;
32	import java.util.Iterator;
33	import java.util.Set;
34	import java.util.Map;
35	import java.util.Vector;
36
37	// Logging
38	import org.apache.log4j.Logger;
39
40	import org.greenstone.LuceneWrapper.GS2LuceneQuery;
41	import org.greenstone.LuceneWrapper.LuceneQueryResult;
42
43	public class GS2LuceneSearch
44	extends AbstractGS2FieldSearch
45	{
46	protected static final String RANK_PARAM_RANK_VALUE = "rank";
47
48	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
49
50	private GS2LuceneQuery lucene_src=null;
51
52	public GS2LuceneSearch()
53	{
54	this.lucene_src = new GS2LuceneQuery();
55	// Lucene uses double operators, not single
56	AND_OPERATOR = "&&";
57	OR_OPERATOR = "\|\|";
58
59	does_paging = true;
60	does_chunking = true;
61	}
62
63	public void cleanUp() {
64	super.cleanUp();
65	this.lucene_src.cleanUp();
66	}
67
68	/** configure this service */
69	public boolean configure(Element info, Element extra_info)
70	{
71	if (!super.configure(info, extra_info)){
72	return false;
73	}
74
75	// Lucene doesn't do case folding or stemming or accent folding at the
76	// moment
77	does_case = false;
78	does_stem = false;
79	does_accent = false;
80
81	return true;
82	}
83
84	/** add in the lucene specific params to TextQuery */
85	protected void addCustomQueryParams(Element param_list, String lang)
86	{
87	super.addCustomQueryParams(param_list, lang);
88	/** lucenes rank param is based on index fields, not ranked/not */
89	createParameter(RANK_PARAM, param_list, lang);
90
91	}
92
93	/** create a param and add to the list */
94	/** we override this to do a special rank param */
95	protected void createParameter(String name, Element param_list, String lang)
96	{
97	Element param = null;
98	if (name.equals(RANK_PARAM)) {
99	// get the fields
100	ArrayList fields = new ArrayList();
101	fields.add(RANK_PARAM_RANK_VALUE);
102	ArrayList field_names = new ArrayList();
103	field_names.add(getTextString("param.sortBy.rank", lang));
104	getSortByIndexData(fields, field_names, lang);
105
106	param = GSXML.createParameterDescription2(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)fields.get(0), fields, field_names );
107	}
108	if (param != null) {
109	param_list.appendChild(param);
110	} else {
111	super.createParameter(name, param_list, lang);
112	}
113	}
114
115	protected void getSortByIndexData(ArrayList index_ids, ArrayList index_names, String lang) {
116	// the index info -
117	Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
118	NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
119	int len = indexes.getLength();
120	// now add even if there is only one
121	for (int i=0; i<len; i++) {
122	Element index = (Element)indexes.item(i);
123	String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
124	if (shortname.equals("") \|\| shortname.equals("ZZ") \|\| shortname.equals("TX")) {
125	continue;
126	}
127	index_ids.add("by"+shortname);
128	String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
129	if (display_name.equals("")) {
130	display_name = index.getAttribute(GSXML.NAME_ATT);
131	if (display_name.equals("")) {
132	display_name = shortname;
133	}
134	}
135	index_names.add(display_name);
136
137	}
138
139	}
140
141	/** methods to handle actually doing the query */
142
143	/** do any initialisation of the query object */
144	protected boolean setUpQueryer(HashMap params) {
145	String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index"+File.separatorChar;
146
147	String index = "didx";
148	String physical_index_language_name=null;
149	String physical_sub_index_name=null;
150	int maxdocs = 100;
151	int hits_per_page = 20;
152	int start_page = 1;
153	// set up the query params
154	Set entries = params.entrySet();
155	Iterator i = entries.iterator();
156	while (i.hasNext()) {
157	Map.Entry m = (Map.Entry)i.next();
158	String name = (String)m.getKey();
159	String value = (String)m.getValue();
160
161	if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) {
162	maxdocs = Integer.parseInt(value);
163	} else if (name.equals(HITS_PER_PAGE_PARAM)) {
164	hits_per_page = Integer.parseInt(value);
165	} else if (name.equals(START_PAGE_PARAM)) {
166	start_page = Integer.parseInt(value);
167
168	} else if (name.equals(MATCH_PARAM)) {
169	if (value.equals(MATCH_PARAM_ALL)) {
170	this.lucene_src.setDefaultConjunctionOperator("AND");
171	} else{
172	this.lucene_src.setDefaultConjunctionOperator("OR");
173	}
174	} else if (name.equals(RANK_PARAM)) {
175	if (value.equals(RANK_PARAM_RANK_VALUE)) {
176	value = null;
177	}
178	this.lucene_src.setSortField(value);
179	} else if (name.equals(LEVEL_PARAM)) {
180	if (value.toUpperCase().equals("SEC")){
181	index = "sidx";
182	}
183	else {
184	index = "didx";
185	}
186	} else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) {
187	physical_sub_index_name=value;
188	} else if (name.equals(INDEX_LANGUAGE_PARAM)){
189	physical_index_language_name=value;
190	} // ignore any others
191	}
192	// set up start and end results if necessary
193	int start_results = 1;
194	if (start_page != 1) {
195	start_results = ((start_page-1) * hits_per_page) + 1;
196	}
197	int end_results = hits_per_page * start_page;
198	this.lucene_src.setStartResults(start_results);
199	this.lucene_src.setEndResults(end_results);
200
201
202	if (index.equals("sidx") \|\| index.equals("didx")){
203	if (physical_sub_index_name!=null) {
204	index+=physical_sub_index_name;
205	}
206	if (physical_index_language_name!=null){
207	index+=physical_index_language_name;
208	}
209	}
210
211	this.lucene_src.setIndexDir(indexdir+index);
212	this.lucene_src.initialise();
213	return true;
214	}
215	/** do the query */
216	protected Object runQuery(String query) {
217	try {
218	LuceneQueryResult lqr=this.lucene_src.runQuery(query);
219	return lqr;
220	} catch (Exception e) {
221	logger.error ("exception happened in run query: ", e);
222	}
223
224	return null;
225	}
226	/** get the total number of docs that match */
227	protected long numDocsMatched(Object query_result) {
228	return ((LuceneQueryResult)query_result).getTotalDocs();
229
230	}
231	/** get the list of doc ids */
232	protected String [] getDocIDs(Object query_result) {
233	Vector docs = ((LuceneQueryResult)query_result).getDocs();
234	String [] doc_nums = new String [docs.size()];
235	for (int d = 0; d < docs.size(); d++) {
236	String doc_num = ((LuceneQueryResult.DocInfo) docs.elementAt(d)).id_;
237	doc_nums[d] = doc_num;
238	}
239	return doc_nums;
240	}
241	/** get the list of doc ranks */
242	protected String [] getDocRanks(Object query_result) {
243	Vector docs = ((LuceneQueryResult)query_result).getDocs();
244	String [] doc_ranks = new String [docs.size()];
245	for (int d = 0; d < docs.size(); d++) {
246	doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_);
247	}
248	return doc_ranks;
249	}
250	/** add in term info if available */
251	protected boolean addTermInfo(Element term_list, HashMap params,
252	Object query_result) {
253	String query_level = (String)params.get(LEVEL_PARAM); // the current query level
254
255	Vector terms = ((LuceneQueryResult)query_result).getTerms();
256	for (int t = 0; t < terms.size(); t++) {
257	LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t);
258
259	Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
260	term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
261	term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
262	term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
263	term_elem.setAttribute(FIELD_ATT, term_info.field_);
264	term_list.appendChild(term_elem);
265	}
266	return true;
267	}
268
269	protected String addFieldInfo(String query, String field) {
270	if (field.equals("") \|\| field.equals("ZZ")) {
271	return query;
272	}
273	return field+":("+query+")";
274	}
275
276	protected void addQueryElem(StringBuffer s, String q, String f, String c) {
277
278	String combine="";
279	if (s.length()>0) {
280	combine = " "+c+" ";
281	}
282	s.append(combine + addFieldInfo(q,f));
283	}
284
285	/** Lucene doesn't use these options at the moment */
286	protected String addStemOptions(String query, String stem,
287	String casef, String accent) {
288	return query;
289	}
290
291	/** Lucene no longer uses internal ids. It just uses hash ids. So we need
292	to override these methods so no conversion is done. */
293	/** convert indexer internal id to Greenstone oid */
294	protected String internalNum2OID(long docnum)
295	{
296	return Long.toString(docnum);
297
298	}
299	protected String internalNum2OID(String docnum)
300	{
301	return docnum;
302
303	}
304
305	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: