Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java@ 28966

Last change on this file since 28966 was 28966, checked in by kjdon, 10 years ago
Lots of changes. Mainly to do with removing this.doc from everywhere. Document is not thread safe. Now we tend to create a new Document everytime we are starting a new page/message etc. in service this.desc_doc is available as teh document to create service info stuff. But it should only be used for this and not for other messages. newDOM is now static for XMLConverter. method param changes for some GSXML methods.
Property svn:keywords set to `Author Date Id Revision`
File size: 7.3 KB

Line
1	/*
2	* GS2LuceneSearch.java
3	* Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* the Free Software Foundation; either version 2 of the License, or
7	* (at your option) any later version.
8	*
9	* This program is distributed in the hope that it will be useful,
10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	* GNU General Public License for more details.
13	*
14	* You should have received a copy of the GNU General Public License
15	* along with this program; if not, write to the Free Software
16	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17	*/
18
19	package org.greenstone.gsdl3.service;
20
21	// Greenstone classes
22	import java.io.File;
23	import java.util.ArrayList;
24	import java.util.HashMap;
25	import java.util.Iterator;
26	import java.util.Map;
27	import java.util.Set;
28	import java.util.Vector;
29
30	import org.apache.log4j.Logger;
31	import org.greenstone.LuceneWrapper3.GS2LuceneQuery;
32	import org.greenstone.LuceneWrapper3.LuceneQueryResult;
33	import org.greenstone.gsdl3.util.FacetWrapper;
34	import org.greenstone.gsdl3.util.GSFile;
35	import org.greenstone.gsdl3.util.GSXML;
36	import org.greenstone.gsdl3.util.XMLConverter;
37
38	import org.w3c.dom.Document;
39	import org.w3c.dom.Element;
40
41	public class GS2LuceneSearch extends SharedSoleneGS2FieldSearch
42	{
43	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
44
45	private GS2LuceneQuery lucene_src = null;
46
47	public GS2LuceneSearch()
48	{
49	this.lucene_src = new GS2LuceneQuery();
50	}
51
52	public void cleanUp()
53	{
54	super.cleanUp();
55	this.lucene_src.cleanUp();
56	}
57
58	/** methods to handle actually doing the query */
59
60	/** do any initialisation of the query object */
61	protected boolean setUpQueryer(HashMap params)
62	{
63	String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index" + File.separatorChar;
64
65	String index = "didx";
66	String physical_index_language_name = null;
67	String physical_sub_index_name = null;
68	int maxdocs = 100;
69	int hits_per_page = 20;
70	int start_page = 1;
71	String sort_field = GS2LuceneQuery.SORT_RANK;
72	String sort_order = SORT_ORDER_ASCENDING;
73	// set up the query params
74	Set entries = params.entrySet();
75	Iterator i = entries.iterator();
76	while (i.hasNext())
77	{
78	Map.Entry m = (Map.Entry) i.next();
79	String name = (String) m.getKey();
80	String value = (String) m.getValue();
81
82	if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
83	{
84	maxdocs = Integer.parseInt(value);
85	}
86	else if (name.equals(HITS_PER_PAGE_PARAM))
87	{
88	hits_per_page = Integer.parseInt(value);
89	}
90	else if (name.equals(START_PAGE_PARAM))
91	{
92	start_page = Integer.parseInt(value);
93
94	}
95	else if (name.equals(MATCH_PARAM))
96	{
97	if (value.equals(MATCH_PARAM_ALL))
98	{
99	this.lucene_src.setDefaultConjunctionOperator("AND");
100	}
101	else
102	{
103	this.lucene_src.setDefaultConjunctionOperator("OR");
104	}
105	}
106	else if (name.equals(RANK_PARAM))
107	{
108	if (value.equals(RANK_PARAM_RANK))
109	{
110	value = GS2LuceneQuery.SORT_RANK;
111	} else if (value.equals(RANK_PARAM_NONE)) {
112	value = GS2LuceneQuery.SORT_NATURAL;
113	}
114	this.lucene_src.setSortField(value);
115	sort_field = value;
116	}
117	else if (name.equals(SORT_ORDER_PARAM)) {
118	sort_order = value;
119	}
120	else if (name.equals(LEVEL_PARAM))
121	{
122	if (value.toUpperCase().equals("SEC"))
123	{
124	index = "sidx";
125	}
126	else
127	{
128	index = "didx";
129	}
130	}
131	else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
132	{
133	physical_sub_index_name = value;
134	}
135	else if (name.equals(INDEX_LANGUAGE_PARAM))
136	{
137	physical_index_language_name = value;
138	} // ignore any others
139	}
140	// set up start and end results if necessary
141	int start_results = 1;
142	if (start_page != 1)
143	{
144	start_results = ((start_page - 1) * hits_per_page) + 1;
145	}
146	int end_results = hits_per_page * start_page;
147	this.lucene_src.setStartResults(start_results);
148	this.lucene_src.setEndResults(end_results);
149
150	if (index.equals("sidx") \|\| index.equals("didx"))
151	{
152	if (physical_sub_index_name != null)
153	{
154	index += physical_sub_index_name;
155	}
156	if (physical_index_language_name != null)
157	{
158	index += physical_index_language_name;
159	}
160	}
161
162	// default order for rank is descending, while for other
163	// fields it is ascending. So reverse_sort is different for
164	// the two cases.
165	if (sort_field.equals(GS2LuceneQuery.SORT_RANK)) {
166	if (sort_order.equals(SORT_ORDER_ASCENDING)) {
167	this.lucene_src.setReverseSort(true);
168	} else {
169	this.lucene_src.setReverseSort(false);
170	}
171	} else {
172	if (sort_order.equals(SORT_ORDER_DESCENDING)) {
173	this.lucene_src.setReverseSort(true);
174	} else {
175	this.lucene_src.setReverseSort(false);
176	}
177	}
178	this.lucene_src.setIndexDir(indexdir + index);
179	this.lucene_src.initialise();
180	return true;
181	}
182
183	/** do the query */
184	protected Object runQuery(String query)
185	{
186	try
187	{
188	LuceneQueryResult lqr = this.lucene_src.runQuery(query);
189	return lqr;
190	}
191	catch (Exception e)
192	{
193	logger.error("Exception happened in runQuery(): ", e);
194	}
195
196	return null;
197	}
198
199	/** get the total number of docs that match */
200	protected long numDocsMatched(Object query_result)
201	{
202	return ((LuceneQueryResult) query_result).getTotalDocs();
203	}
204
205	/** get the list of doc ids */
206	protected String[] getDocIDs(Object query_result)
207	{
208	Vector docs = ((LuceneQueryResult) query_result).getDocs();
209	String[] doc_nums = new String[docs.size()];
210	for (int d = 0; d < docs.size(); d++)
211	{
212	String doc_num = ((LuceneQueryResult.DocInfo) docs.elementAt(d)).id_;
213	doc_nums[d] = doc_num;
214	}
215	return doc_nums;
216	}
217
218	/** get the list of doc ranks */
219	protected String[] getDocRanks(Object query_result)
220	{
221	Vector docs = ((LuceneQueryResult) query_result).getDocs();
222	String[] doc_ranks = new String[docs.size()];
223	for (int d = 0; d < docs.size(); d++)
224	{
225	doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_);
226	}
227	return doc_ranks;
228	}
229
230	/** add in term info if available */
231	protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
232	{
233	Document doc = term_list.getOwnerDocument();
234	String query_level = (String) params.get(LEVEL_PARAM); // the current query level
235
236	Vector terms = ((LuceneQueryResult) query_result).getTerms();
237	for (int t = 0; t < terms.size(); t++)
238	{
239	LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t);
240
241	Element term_elem = doc.createElement(GSXML.TERM_ELEM);
242	term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
243	term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
244	term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
245	term_elem.setAttribute(FIELD_ATT, term_info.field_);
246	term_list.appendChild(term_elem);
247	}
248
249	Vector stopwords = ((LuceneQueryResult) query_result).getStopWords();
250	for (int t = 0; t < stopwords.size(); t++)
251	{
252	String stopword = (String) stopwords.get(t);
253
254	Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
255	stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
256	term_list.appendChild(stopword_elem);
257	}
258
259	return true;
260	}
261
262	protected ArrayList<FacetWrapper> getFacets(Object query_result)
263	{
264	return null;
265	}
266	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: