source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java@ 29428

Last change on this file since 29428 was 29428, checked in by kjdon, 9 years ago

use paramDefaults for setting up queryer, and default_sort

  • Property svn:keywords set to Author Date Id Revision
File size: 7.5 KB
Line 
1/*
2 * GS2LuceneSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.io.File;
23import java.util.ArrayList;
24import java.util.HashMap;
25import java.util.Iterator;
26import java.util.Map;
27import java.util.Set;
28import java.util.Vector;
29
30import org.apache.log4j.Logger;
31import org.greenstone.LuceneWrapper4.GS2LuceneQuery;
32import org.greenstone.LuceneWrapper4.LuceneQueryResult;
33import org.greenstone.gsdl3.util.FacetWrapper;
34import org.greenstone.gsdl3.util.GSFile;
35import org.greenstone.gsdl3.util.GSXML;
36import org.greenstone.gsdl3.util.XMLConverter;
37
38import org.w3c.dom.Document;
39import org.w3c.dom.Element;
40
41public class GS2LuceneSearch extends SharedSoleneGS2FieldSearch
42{
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
44
45 private GS2LuceneQuery lucene_src = null;
46
47 public GS2LuceneSearch()
48 {
49 this.lucene_src = new GS2LuceneQuery();
50 }
51
52 public void cleanUp()
53 {
54 super.cleanUp();
55 this.lucene_src.cleanUp();
56 }
57
58 /** methods to handle actually doing the query */
59
60 /** do any initialisation of the query object */
61 protected boolean setUpQueryer(HashMap params)
62 {
63 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index" + File.separatorChar;
64
65 String index = "didx";
66 String physical_index_language_name = null;
67 String physical_sub_index_name = null;
68 int maxdocs = Integer.parseInt(paramDefaults.get(MAXDOCS_PARAM));
69 int hits_per_page = Integer.parseInt(paramDefaults.get(HITS_PER_PAGE_PARAM));
70 int start_page = Integer.parseInt(paramDefaults.get(START_PAGE_PARAM));
71 String sort_field = getLuceneSort(default_sort);
72 String sort_order = paramDefaults.get(SORT_ORDER_PARAM);
73 // set up the query params
74 Set entries = params.entrySet();
75 Iterator i = entries.iterator();
76 while (i.hasNext())
77 {
78 Map.Entry m = (Map.Entry) i.next();
79 String name = (String) m.getKey();
80 String value = (String) m.getValue();
81
82 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
83 {
84 maxdocs = Integer.parseInt(value);
85 }
86 else if (name.equals(HITS_PER_PAGE_PARAM))
87 {
88 hits_per_page = Integer.parseInt(value);
89 }
90 else if (name.equals(START_PAGE_PARAM))
91 {
92 start_page = Integer.parseInt(value);
93
94 }
95 else if (name.equals(MATCH_PARAM))
96 {
97 if (value.equals(MATCH_PARAM_ALL))
98 {
99 this.lucene_src.setDefaultConjunctionOperator("AND");
100 }
101 else
102 {
103 this.lucene_src.setDefaultConjunctionOperator("OR");
104 }
105 }
106 else if (name.equals(RANK_PARAM))
107 {
108 sort_field = getLuceneSort(value);
109 this.lucene_src.setSortField(sort_field);
110
111 }
112 else if (name.equals(SORT_ORDER_PARAM)) {
113 sort_order = value;
114 }
115 else if (name.equals(LEVEL_PARAM))
116 {
117 if (value.toUpperCase().equals("SEC"))
118 {
119 index = "sidx";
120 }
121 else
122 {
123 index = "didx";
124 }
125 }
126 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
127 {
128 physical_sub_index_name = value;
129 }
130 else if (name.equals(INDEX_LANGUAGE_PARAM))
131 {
132 physical_index_language_name = value;
133 } // ignore any others
134 }
135 // set up start and end results if necessary
136 int start_results = 1;
137 if (start_page != 1)
138 {
139 start_results = ((start_page - 1) * hits_per_page) + 1;
140 }
141 int end_results = hits_per_page * start_page;
142 this.lucene_src.setStartResults(start_results);
143 this.lucene_src.setEndResults(end_results);
144
145 if (index.equals("sidx") || index.equals("didx"))
146 {
147 if (physical_sub_index_name != null)
148 {
149 index += physical_sub_index_name;
150 }
151 if (physical_index_language_name != null)
152 {
153 index += physical_index_language_name;
154 }
155 }
156
157 // default order for rank is descending, while for other
158 // fields it is ascending. So reverse_sort is different for
159 // the two cases.
160 if (sort_field.equals(GS2LuceneQuery.SORT_RANK)) {
161 if (sort_order.equals(SORT_ORDER_ASCENDING)) {
162 this.lucene_src.setReverseSort(true);
163 } else {
164 this.lucene_src.setReverseSort(false);
165 }
166 } else {
167 if (sort_order.equals(SORT_ORDER_DESCENDING)) {
168 this.lucene_src.setReverseSort(true);
169 } else {
170 this.lucene_src.setReverseSort(false);
171 }
172 }
173 this.lucene_src.setIndexDir(indexdir + index);
174 this.lucene_src.initialise();
175 return true;
176 }
177
178 /** do the query */
179 protected Object runQuery(String query)
180 {
181 try
182 {
183 LuceneQueryResult lqr = this.lucene_src.runQuery(query);
184 return lqr;
185 }
186 catch (Exception e)
187 {
188 logger.error("Exception happened in runQuery(): ", e);
189 }
190
191 return null;
192 }
193
194 /** get the total number of docs that match */
195 protected long numDocsMatched(Object query_result)
196 {
197 return ((LuceneQueryResult) query_result).getTotalDocs();
198 }
199
200 /** get the list of doc ids */
201 protected String[] getDocIDs(Object query_result)
202 {
203 Vector docs = ((LuceneQueryResult) query_result).getDocs();
204 String[] doc_nums = new String[docs.size()];
205 for (int d = 0; d < docs.size(); d++)
206 {
207 String doc_num = ((LuceneQueryResult.DocInfo) docs.elementAt(d)).id_;
208 doc_nums[d] = doc_num;
209 }
210 return doc_nums;
211 }
212
213 /** get the list of doc ranks */
214 protected String[] getDocRanks(Object query_result)
215 {
216 Vector docs = ((LuceneQueryResult) query_result).getDocs();
217 String[] doc_ranks = new String[docs.size()];
218 for (int d = 0; d < docs.size(); d++)
219 {
220 doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_);
221 }
222 return doc_ranks;
223 }
224
225 /** add in term info if available */
226 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
227 {
228 Document doc = term_list.getOwnerDocument();
229 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
230
231 Vector terms = ((LuceneQueryResult) query_result).getTerms();
232 for (int t = 0; t < terms.size(); t++)
233 {
234 LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t);
235
236 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
237 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
238 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
239 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
240 term_elem.setAttribute(FIELD_ATT, term_info.field_);
241 term_list.appendChild(term_elem);
242 }
243
244 Vector stopwords = ((LuceneQueryResult) query_result).getStopWords();
245 for (int t = 0; t < stopwords.size(); t++)
246 {
247 String stopword = (String) stopwords.get(t);
248
249 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
250 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
251 term_list.appendChild(stopword_elem);
252 }
253
254 return true;
255 }
256
257 protected ArrayList<FacetWrapper> getFacets(Object query_result)
258 {
259 return null;
260 }
261
262 protected String getLuceneSort(String gs3_sort) {
263
264 if (gs3_sort.equals(RANK_PARAM_RANK)) {
265 return GS2LuceneQuery.SORT_RANK;
266 }
267 if (gs3_sort.equals(RANK_PARAM_NONE)) {
268 return GS2LuceneQuery.SORT_NATURAL;
269 }
270 return gs3_sort;
271 }
272}
Note: See TracBrowser for help on using the repository browser.