source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java@ 18422

Last change on this file since 18422 was 18422, checked in by kjdon, 15 years ago

lucene uses greenstone ids as its internal ids now, so need to make dummy methods that don't do anything for internalNum2OID

  • Property svn:keywords set to Author Date Id Revision
File size: 9.6 KB
Line 
1/*
2 * GS2LuceneSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.util.*;
23
24// XML classes
25import org.w3c.dom.Element;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28// java classes
29import java.util.ArrayList;
30import java.util.HashMap;
31import java.io.File;
32import java.util.Iterator;
33import java.util.Set;
34import java.util.Map;
35import java.util.Vector;
36
37// Logging
38import org.apache.log4j.Logger;
39
40import org.greenstone.LuceneWrapper.GS2LuceneQuery;
41import org.greenstone.LuceneWrapper.LuceneQueryResult;
42
43public class GS2LuceneSearch
44 extends AbstractGS2FieldSearch
45{
46 protected static final String RANK_PARAM_RANK_VALUE = "rank";
47
48 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
49
50 private GS2LuceneQuery lucene_src=null;
51
52 public GS2LuceneSearch()
53 {
54 this.lucene_src = new GS2LuceneQuery();
55 // Lucene uses double operators, not single
56 AND_OPERATOR = "&&";
57 OR_OPERATOR = "||";
58
59 does_paging = true;
60 does_chunking = true;
61 }
62
63 public void cleanUp() {
64 super.cleanUp();
65 this.lucene_src.cleanUp();
66 }
67
68 /** configure this service */
69 public boolean configure(Element info, Element extra_info)
70 {
71 if (!super.configure(info, extra_info)){
72 return false;
73 }
74
75 // Lucene doesn't do case folding or stemming or accent folding at the
76 // moment
77 does_case = false;
78 does_stem = false;
79 does_accent = false;
80
81 return true;
82 }
83
84 /** add in the lucene specific params to TextQuery */
85 protected void addCustomQueryParams(Element param_list, String lang)
86 {
87 super.addCustomQueryParams(param_list, lang);
88 /** lucenes rank param is based on index fields, not ranked/not */
89 createParameter(RANK_PARAM, param_list, lang);
90
91 }
92
93 /** create a param and add to the list */
94 /** we override this to do a special rank param */
95 protected void createParameter(String name, Element param_list, String lang)
96 {
97 Element param = null;
98 if (name.equals(RANK_PARAM)) {
99 // get the fields
100 ArrayList fields = new ArrayList();
101 fields.add(RANK_PARAM_RANK_VALUE);
102 ArrayList field_names = new ArrayList();
103 field_names.add(getTextString("param.sortBy.rank", lang));
104 getSortByIndexData(fields, field_names, lang);
105
106 param = GSXML.createParameterDescription2(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)fields.get(0), fields, field_names );
107 }
108 if (param != null) {
109 param_list.appendChild(param);
110 } else {
111 super.createParameter(name, param_list, lang);
112 }
113 }
114
115 protected void getSortByIndexData(ArrayList index_ids, ArrayList index_names, String lang) {
116 // the index info -
117 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
118 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
119 int len = indexes.getLength();
120 // now add even if there is only one
121 for (int i=0; i<len; i++) {
122 Element index = (Element)indexes.item(i);
123 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
124 if (shortname.equals("") || shortname.equals("ZZ") || shortname.equals("TX")) {
125 continue;
126 }
127 index_ids.add("by"+shortname);
128 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
129 if (display_name.equals("")) {
130 display_name = index.getAttribute(GSXML.NAME_ATT);
131 if (display_name.equals("")) {
132 display_name = shortname;
133 }
134 }
135 index_names.add(display_name);
136
137 }
138
139 }
140
141 /** methods to handle actually doing the query */
142
143 /** do any initialisation of the query object */
144 protected boolean setUpQueryer(HashMap params) {
145 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index"+File.separatorChar;
146
147 String index = "didx";
148 String physical_index_language_name=null;
149 String physical_sub_index_name=null;
150 int maxdocs = 100;
151 int hits_per_page = 20;
152 int start_page = 1;
153 // set up the query params
154 Set entries = params.entrySet();
155 Iterator i = entries.iterator();
156 while (i.hasNext()) {
157 Map.Entry m = (Map.Entry)i.next();
158 String name = (String)m.getKey();
159 String value = (String)m.getValue();
160
161 if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) {
162 maxdocs = Integer.parseInt(value);
163 } else if (name.equals(HITS_PER_PAGE_PARAM)) {
164 hits_per_page = Integer.parseInt(value);
165 } else if (name.equals(START_PAGE_PARAM)) {
166 start_page = Integer.parseInt(value);
167
168 } else if (name.equals(MATCH_PARAM)) {
169 if (value.equals(MATCH_PARAM_ALL)) {
170 this.lucene_src.setDefaultConjunctionOperator("AND");
171 } else{
172 this.lucene_src.setDefaultConjunctionOperator("OR");
173 }
174 } else if (name.equals(RANK_PARAM)) {
175 if (value.equals(RANK_PARAM_RANK_VALUE)) {
176 value = null;
177 }
178 this.lucene_src.setSortField(value);
179 } else if (name.equals(LEVEL_PARAM)) {
180 if (value.toUpperCase().equals("SEC")){
181 index = "sidx";
182 }
183 else {
184 index = "didx";
185 }
186 } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) {
187 physical_sub_index_name=value;
188 } else if (name.equals(INDEX_LANGUAGE_PARAM)){
189 physical_index_language_name=value;
190 } // ignore any others
191 }
192 // set up start and end results if necessary
193 int start_results = 1;
194 if (start_page != 1) {
195 start_results = ((start_page-1) * hits_per_page) + 1;
196 }
197 int end_results = hits_per_page * start_page;
198 this.lucene_src.setStartResults(start_results);
199 this.lucene_src.setEndResults(end_results);
200
201
202 if (index.equals("sidx") || index.equals("didx")){
203 if (physical_sub_index_name!=null) {
204 index+=physical_sub_index_name;
205 }
206 if (physical_index_language_name!=null){
207 index+=physical_index_language_name;
208 }
209 }
210
211 this.lucene_src.setIndexDir(indexdir+index);
212 this.lucene_src.initialise();
213 return true;
214 }
215 /** do the query */
216 protected Object runQuery(String query) {
217 try {
218 LuceneQueryResult lqr=this.lucene_src.runQuery(query);
219 return lqr;
220 } catch (Exception e) {
221 logger.error ("exception happened in run query: ", e);
222 }
223
224 return null;
225 }
226 /** get the total number of docs that match */
227 protected long numDocsMatched(Object query_result) {
228 return ((LuceneQueryResult)query_result).getTotalDocs();
229
230 }
231 /** get the list of doc ids */
232 protected String [] getDocIDs(Object query_result) {
233 Vector docs = ((LuceneQueryResult)query_result).getDocs();
234 String [] doc_nums = new String [docs.size()];
235 for (int d = 0; d < docs.size(); d++) {
236 String doc_num = ((LuceneQueryResult.DocInfo) docs.elementAt(d)).id_;
237 doc_nums[d] = doc_num;
238 }
239 return doc_nums;
240 }
241 /** get the list of doc ranks */
242 protected String [] getDocRanks(Object query_result) {
243 Vector docs = ((LuceneQueryResult)query_result).getDocs();
244 String [] doc_ranks = new String [docs.size()];
245 for (int d = 0; d < docs.size(); d++) {
246 doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_);
247 }
248 return doc_ranks;
249 }
250 /** add in term info if available */
251 protected boolean addTermInfo(Element term_list, HashMap params,
252 Object query_result) {
253 String query_level = (String)params.get(LEVEL_PARAM); // the current query level
254
255 Vector terms = ((LuceneQueryResult)query_result).getTerms();
256 for (int t = 0; t < terms.size(); t++) {
257 LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t);
258
259 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
260 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
261 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
262 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
263 term_elem.setAttribute(FIELD_ATT, term_info.field_);
264 term_list.appendChild(term_elem);
265 }
266 return true;
267 }
268
269 protected String addFieldInfo(String query, String field) {
270 if (field.equals("") || field.equals("ZZ")) {
271 return query;
272 }
273 return field+":("+query+")";
274 }
275
276 protected void addQueryElem(StringBuffer s, String q, String f, String c) {
277
278 String combine="";
279 if (s.length()>0) {
280 combine = " "+c+" ";
281 }
282 s.append(combine + addFieldInfo(q,f));
283 }
284
285 /** Lucene doesn't use these options at the moment */
286 protected String addStemOptions(String query, String stem,
287 String casef, String accent) {
288 return query;
289 }
290
291 /** Lucene no longer uses internal ids. It just uses hash ids. So we need
292 to override these methods so no conversion is done. */
293 /** convert indexer internal id to Greenstone oid */
294 protected String internalNum2OID(long docnum)
295 {
296 return Long.toString(docnum);
297
298 }
299 protected String internalNum2OID(String docnum)
300 {
301 return docnum;
302
303 }
304
305}
Note: See TracBrowser for help on using the repository browser.