Context Navigation

source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java@ 14530

Last change on this file since 14530 was 14005, checked in by qq6, 17 years ago
change didx to sidx
Property svn:keywords set to `Author Date Id Revision`
File size: 9.2 KB

Line
1	/*
2	* GS2LuceneSearch.java
3	* Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* the Free Software Foundation; either version 2 of the License, or
7	* (at your option) any later version.
8	*
9	* This program is distributed in the hope that it will be useful,
10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	* GNU General Public License for more details.
13	*
14	* You should have received a copy of the GNU General Public License
15	* along with this program; if not, write to the Free Software
16	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17	*/
18
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.util.*;
23
24	// XML classes
25	import org.w3c.dom.Element;
26	import org.w3c.dom.NodeList;
27	import org.w3c.dom.Document;
28	// java classes
29	import java.util.ArrayList;
30	import java.util.HashMap;
31	import java.io.File;
32	import java.util.Iterator;
33	import java.util.Set;
34	import java.util.Map;
35	import java.util.Vector;
36
37	// Logging
38	import org.apache.log4j.Logger;
39
40	import org.greenstone.LuceneWrapper.GS2LuceneQuery;
41	import org.greenstone.LuceneWrapper.LuceneQueryResult;
42
43	public class GS2LuceneSearch
44	extends AbstractGS2FieldSearch
45	{
46	protected static final String RANK_PARAM_RANK_VALUE = "rank";
47
48	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
49
50	private GS2LuceneQuery lucene_src=null;
51
52	public GS2LuceneSearch()
53	{
54	this.lucene_src = new GS2LuceneQuery();
55	// Lucene uses double operators, not single
56	AND_OPERATOR = "&&";
57	OR_OPERATOR = "\|\|";
58
59	does_paging = true;
60	does_chunking = true;
61	}
62
63	public void cleanUp() {
64	super.cleanUp();
65	this.lucene_src.cleanUp();
66	}
67
68	/** configure this service */
69	public boolean configure(Element info, Element extra_info)
70	{
71	if (!super.configure(info, extra_info)){
72	return false;
73	}
74
75	// Lucene doesn't do case folding or stemming or accent folding at the
76	// moment
77	does_case = false;
78	does_stem = false;
79	does_accent = false;
80
81	return true;
82	}
83
84	/** add in the lucene specific params to TextQuery */
85	protected void addCustomQueryParams(Element param_list, String lang)
86	{
87	super.addCustomQueryParams(param_list, lang);
88	/** lucenes rank param is based on index fields, not ranked/not */
89	createParameter(RANK_PARAM, param_list, lang);
90
91	}
92
93	/** create a param and add to the list */
94	/** we override this to do a special rank param */
95	protected void createParameter(String name, Element param_list, String lang)
96	{
97	Element param = null;
98	if (name.equals(RANK_PARAM)) {
99	// get the fields
100	ArrayList fields = new ArrayList();
101	fields.add(RANK_PARAM_RANK_VALUE);
102	ArrayList field_names = new ArrayList();
103	field_names.add(getTextString("param.sortBy.rank", lang));
104	getSortByIndexData(fields, field_names, lang);
105
106	param = GSXML.createParameterDescription2(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)fields.get(0), fields, field_names );
107	}
108	if (param != null) {
109	param_list.appendChild(param);
110	} else {
111	super.createParameter(name, param_list, lang);
112	}
113	}
114
115	protected void getSortByIndexData(ArrayList index_ids, ArrayList index_names, String lang) {
116	// the index info -
117	Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
118	NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
119	int len = indexes.getLength();
120	// now add even if there is only one
121	for (int i=0; i<len; i++) {
122	Element index = (Element)indexes.item(i);
123	String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
124	if (shortname.equals("") \|\| shortname.equals("ZZ") \|\| shortname.equals("TX")) {
125	continue;
126	}
127	index_ids.add("by"+shortname);
128	String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
129	if (display_name.equals("")) {
130	display_name = index.getAttribute(GSXML.NAME_ATT);
131	if (display_name.equals("")) {
132	display_name = shortname;
133	}
134	}
135	index_names.add(display_name);
136
137	}
138
139	}
140
141	/** methods to handle actually doing the query */
142
143	/** do any initialisation of the query object */
144	protected boolean setUpQueryer(HashMap params) {
145	String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index"+File.separatorChar;
146
147	String index = "didx";
148	String physical_index_language_name=null;
149	String physical_sub_index_name=null;
150	int maxdocs = 100;
151	int hits_per_page = 20;
152	int start_page = 1;
153	// set up the query params
154	Set entries = params.entrySet();
155	Iterator i = entries.iterator();
156	while (i.hasNext()) {
157	Map.Entry m = (Map.Entry)i.next();
158	String name = (String)m.getKey();
159	String value = (String)m.getValue();
160
161	if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) {
162	maxdocs = Integer.parseInt(value);
163	} else if (name.equals(HITS_PER_PAGE_PARAM)) {
164	hits_per_page = Integer.parseInt(value);
165	} else if (name.equals(START_PAGE_PARAM)) {
166	start_page = Integer.parseInt(value);
167
168	} else if (name.equals(MATCH_PARAM)) {
169	if (value.equals(MATCH_PARAM_ALL)) {
170	this.lucene_src.setDefaultConjunctionOperator("AND");
171	} else{
172	this.lucene_src.setDefaultConjunctionOperator("OR");
173	}
174	} else if (name.equals(RANK_PARAM)) {
175	if (value.equals(RANK_PARAM_RANK_VALUE)) {
176	value = null;
177	}
178	this.lucene_src.setSortField(value);
179	} else if (name.equals(LEVEL_PARAM)) {
180	if (value.toUpperCase().equals("SEC")){
181	index = "sidx";
182	}
183	else {
184	index = "didx";
185	}
186	} else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) {
187	physical_sub_index_name=value;
188	} else if (name.equals(INDEX_LANGUAGE_PARAM)){
189	physical_index_language_name=value;
190	} // ignore any others
191	}
192	// set up start and end results if necessary
193	int start_results = 1;
194	if (start_page != 1) {
195	start_results = ((start_page-1) * hits_per_page) + 1;
196	}
197	int end_results = hits_per_page * start_page;
198	this.lucene_src.setStartResults(start_results);
199	this.lucene_src.setEndResults(end_results);
200
201
202	if (index.equals("sidx") \|\| index.equals("didx")){
203	if (physical_sub_index_name!=null) {
204	index+=physical_sub_index_name;
205	}
206	if (physical_index_language_name!=null){
207	index+=physical_index_language_name;
208	}
209	}
210
211	this.lucene_src.setIndexDir(indexdir+index);
212	this.lucene_src.initialise();
213	return true;
214	}
215	/** do the query */
216	protected Object runQuery(String query) {
217	try {
218	LuceneQueryResult lqr=this.lucene_src.runQuery(query);
219	return lqr;
220	} catch (Exception e) {
221	logger.error ("exception happened in run query: ", e);
222	}
223
224	return null;
225	}
226	/** get the total number of docs that match */
227	protected long numDocsMatched(Object query_result) {
228	return ((LuceneQueryResult)query_result).getTotalDocs();
229
230	}
231	/** get the list of doc ids */
232	protected String [] getDocIDs(Object query_result) {
233	Vector docs = ((LuceneQueryResult)query_result).getDocs();
234	String [] doc_nums = new String [docs.size()];
235	for (int d = 0; d < docs.size(); d++) {
236	String doc_num = Long.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).num_);
237	doc_nums[d] = doc_num;
238	}
239	return doc_nums;
240	}
241	/** get the list of doc ranks */
242	protected String [] getDocRanks(Object query_result) {
243	Vector docs = ((LuceneQueryResult)query_result).getDocs();
244	String [] doc_ranks = new String [docs.size()];
245	for (int d = 0; d < docs.size(); d++) {
246	doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_);
247	}
248	return doc_ranks;
249	}
250	/** add in term info if available */
251	protected boolean addTermInfo(Element term_list, HashMap params,
252	Object query_result) {
253	String query_level = (String)params.get(LEVEL_PARAM); // the current query level
254
255	Vector terms = ((LuceneQueryResult)query_result).getTerms();
256	for (int t = 0; t < terms.size(); t++) {
257	LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t);
258
259	Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
260	term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
261	term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
262	term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
263	term_elem.setAttribute(FIELD_ATT, term_info.field_);
264	term_list.appendChild(term_elem);
265	}
266	return true;
267	}
268
269	protected String addFieldInfo(String query, String field) {
270	if (field.equals("") \|\| field.equals("ZZ")) {
271	return query;
272	}
273	return field+":("+query+")";
274	}
275
276	protected void addQueryElem(StringBuffer s, String q, String f, String c) {
277
278	String combine="";
279	if (s.length()>0) {
280	combine = " "+c+" ";
281	}
282	s.append(combine + addFieldInfo(q,f));
283	}
284
285	/** Lucene doesn't use these options at the moment */
286	protected String addStemOptions(String query, String stem,
287	String casef, String accent) {
288	return query;
289	}
290	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: