Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java@ 13911

Last change on this file since 13911 was 13911, checked in by kjdon, 17 years ago
changed the format of index and field info in buildConfig and collectionConfig. No fields any more, just use indexes. index has a shortname and a name - name comes from collect.cfg, shortname from build.cfg, eg text and TX, or section:text and ste. mg/mgpp/lucene colls config files are more similar now
Property svn:keywords set to `Author Date Id Revision`
File size: 8.7 KB

Line
1	/*
2	* GS2LuceneSearch.java
3	* Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* the Free Software Foundation; either version 2 of the License, or
7	* (at your option) any later version.
8	*
9	* This program is distributed in the hope that it will be useful,
10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	* GNU General Public License for more details.
13	*
14	* You should have received a copy of the GNU General Public License
15	* along with this program; if not, write to the Free Software
16	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17	*/
18
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.util.*;
23
24	// XML classes
25	import org.w3c.dom.Element;
26	import org.w3c.dom.NodeList;
27	import org.w3c.dom.Document;
28	// java classes
29	import java.util.ArrayList;
30	import java.util.HashMap;
31	import java.io.File;
32	import java.util.Iterator;
33	import java.util.Set;
34	import java.util.Map;
35	import java.util.Vector;
36
37	// Logging
38	import org.apache.log4j.Logger;
39
40	import org.greenstone.LuceneWrapper.GS2LuceneQuery;
41	import org.greenstone.LuceneWrapper.LuceneQueryResult;
42
43	public class GS2LuceneSearch
44	extends AbstractGS2FieldSearch
45	{
46	protected static final String RANK_PARAM_RANK_VALUE = "rank";
47
48	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
49
50	private GS2LuceneQuery lucene_src=null;
51
52	public GS2LuceneSearch()
53	{
54	this.lucene_src = new GS2LuceneQuery();
55	// Lucene uses double operators, not single
56	AND_OPERATOR = "&&";
57	OR_OPERATOR = "\|\|";
58
59	does_paging = true;
60	does_chunking = true;
61	}
62
63	public void cleanUp() {
64	super.cleanUp();
65	this.lucene_src.cleanUp();
66	}
67
68	/** configure this service */
69	public boolean configure(Element info, Element extra_info)
70	{
71	if (!super.configure(info, extra_info)){
72	return false;
73	}
74
75	// Lucene doesn't do case folding or stemming or accent folding at the
76	// moment
77	does_case = false;
78	does_stem = false;
79	does_accent = false;
80
81	return true;
82	}
83
84	/** add in the lucene specific params to TextQuery */
85	protected void addCustomQueryParams(Element param_list, String lang)
86	{
87	super.addCustomQueryParams(param_list, lang);
88	/** lucenes rank param is based on index fields, not ranked/not */
89	createParameter(RANK_PARAM, param_list, lang);
90	}
91
92	/** create a param and add to the list */
93	/** we override this to do a special rank param */
94	protected void createParameter(String name, Element param_list, String lang)
95	{
96	Element param = null;
97	if (name.equals(RANK_PARAM)) {
98	// get the fields
99	ArrayList fields = new ArrayList();
100	fields.add(RANK_PARAM_RANK_VALUE);
101	ArrayList field_names = new ArrayList();
102	field_names.add(getTextString("param.sortBy.rank", lang));
103	getSortByIndexData(fields, field_names, lang);
104
105	param = GSXML.createParameterDescription2(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)fields.get(0), fields, field_names );
106	}
107	if (param != null) {
108	param_list.appendChild(param);
109	} else {
110	super.createParameter(name, param_list, lang);
111	}
112	}
113
114	protected void getSortByIndexData(ArrayList index_ids, ArrayList index_names, String lang) {
115	// the index info -
116	Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
117	NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
118	int len = indexes.getLength();
119	// now add even if there is only one
120	for (int i=0; i<len; i++) {
121	Element index = (Element)indexes.item(i);
122	String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
123	if (shortname.equals("") \|\| shortname.equals("ZZ") \|\| shortname.equals("TX")) {
124	continue;
125	}
126	index_ids.add("by"+shortname);
127	String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
128	if (display_name.equals("")) {
129	display_name = index.getAttribute(GSXML.NAME_ATT);
130	if (display_name.equals("")) {
131	display_name = shortname;
132	}
133	}
134	index_names.add(display_name);
135
136	}
137
138	}
139
140	/** methods to handle actually doing the query */
141
142	/** do any initialisation of the query object */
143	protected boolean setUpQueryer(HashMap params) {
144	String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index"+File.separatorChar;
145
146	String index = "didx";
147	int maxdocs = 100;
148	int hits_per_page = 20;
149	int start_page = 1;
150	// set up the query params
151	Set entries = params.entrySet();
152	Iterator i = entries.iterator();
153	while (i.hasNext()) {
154	Map.Entry m = (Map.Entry)i.next();
155	String name = (String)m.getKey();
156	String value = (String)m.getValue();
157
158	if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) {
159	maxdocs = Integer.parseInt(value);
160	} else if (name.equals(HITS_PER_PAGE_PARAM)) {
161	hits_per_page = Integer.parseInt(value);
162	} else if (name.equals(START_PAGE_PARAM)) {
163	start_page = Integer.parseInt(value);
164
165	} else if (name.equals(MATCH_PARAM)) {
166	if (value.equals(MATCH_PARAM_ALL)) {
167	this.lucene_src.setDefaultConjunctionOperator("AND");
168	} else{
169	this.lucene_src.setDefaultConjunctionOperator("OR");
170	}
171	} else if (name.equals(RANK_PARAM)) {
172	if (value.equals(RANK_PARAM_RANK_VALUE)) {
173	value = null;
174	}
175	this.lucene_src.setSortField(value);
176	} else if (name.equals(LEVEL_PARAM)) {
177	if (value.toUpperCase().equals("SEC")){
178	index = "sidx";
179	}
180	else {
181	index = "didx";
182	}
183	} // ignore any others
184	}
185	// set up start and end results if necessary
186	int start_results = 1;
187	if (start_page != 1) {
188	start_results = ((start_page-1) * hits_per_page) + 1;
189	}
190	int end_results = hits_per_page * start_page;
191	this.lucene_src.setStartResults(start_results);
192	this.lucene_src.setEndResults(end_results);
193
194	this.lucene_src.setIndexDir(indexdir+index);
195	this.lucene_src.initialise();
196	return true;
197	}
198	/** do the query */
199	protected Object runQuery(String query) {
200	try {
201	LuceneQueryResult lqr=this.lucene_src.runQuery(query);
202	return lqr;
203	} catch (Exception e) {
204	logger.error ("exception happened in run query: ", e);
205	}
206
207	return null;
208	}
209	/** get the total number of docs that match */
210	protected long numDocsMatched(Object query_result) {
211	return ((LuceneQueryResult)query_result).getTotalDocs();
212
213	}
214	/** get the list of doc ids */
215	protected String [] getDocIDs(Object query_result) {
216	Vector docs = ((LuceneQueryResult)query_result).getDocs();
217	String [] doc_nums = new String [docs.size()];
218	for (int d = 0; d < docs.size(); d++) {
219	String doc_num = Long.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).num_);
220	doc_nums[d] = doc_num;
221	}
222	return doc_nums;
223	}
224	/** get the list of doc ranks */
225	protected String [] getDocRanks(Object query_result) {
226	Vector docs = ((LuceneQueryResult)query_result).getDocs();
227	String [] doc_ranks = new String [docs.size()];
228	for (int d = 0; d < docs.size(); d++) {
229	doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_);
230	}
231	return doc_ranks;
232	}
233	/** add in term info if available */
234	protected boolean addTermInfo(Element term_list, HashMap params,
235	Object query_result) {
236	String query_level = (String)params.get(LEVEL_PARAM); // the current query level
237
238	Vector terms = ((LuceneQueryResult)query_result).getTerms();
239	for (int t = 0; t < terms.size(); t++) {
240	LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t);
241
242	Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
243	term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
244	term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
245	term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
246	term_elem.setAttribute(FIELD_ATT, term_info.field_);
247	term_list.appendChild(term_elem);
248	}
249	return true;
250	}
251
252	protected String addFieldInfo(String query, String field) {
253	if (field.equals("") \|\| field.equals("ZZ")) {
254	return query;
255	}
256	return field+":("+query+")";
257	}
258
259	protected void addQueryElem(StringBuffer s, String q, String f, String c) {
260
261	String combine="";
262	if (s.length()>0) {
263	combine = " "+c+" ";
264	}
265	s.append(combine + addFieldInfo(q,f));
266	}
267
268	/** Lucene doesn't use these options at the moment */
269	protected String addStemOptions(String query, String stem,
270	String casef, String accent) {
271	return query;
272	}
273	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: