source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java@ 14005

Last change on this file since 14005 was 14005, checked in by qq6, 17 years ago

change didx to sidx

  • Property svn:keywords set to Author Date Id Revision
File size: 9.2 KB
Line 
1/*
2 * GS2LuceneSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.util.*;
23
24// XML classes
25import org.w3c.dom.Element;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28// java classes
29import java.util.ArrayList;
30import java.util.HashMap;
31import java.io.File;
32import java.util.Iterator;
33import java.util.Set;
34import java.util.Map;
35import java.util.Vector;
36
37// Logging
38import org.apache.log4j.Logger;
39
40import org.greenstone.LuceneWrapper.GS2LuceneQuery;
41import org.greenstone.LuceneWrapper.LuceneQueryResult;
42
43public class GS2LuceneSearch
44 extends AbstractGS2FieldSearch
45{
46 protected static final String RANK_PARAM_RANK_VALUE = "rank";
47
48 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
49
50 private GS2LuceneQuery lucene_src=null;
51
52 public GS2LuceneSearch()
53 {
54 this.lucene_src = new GS2LuceneQuery();
55 // Lucene uses double operators, not single
56 AND_OPERATOR = "&&";
57 OR_OPERATOR = "||";
58
59 does_paging = true;
60 does_chunking = true;
61 }
62
63 public void cleanUp() {
64 super.cleanUp();
65 this.lucene_src.cleanUp();
66 }
67
68 /** configure this service */
69 public boolean configure(Element info, Element extra_info)
70 {
71 if (!super.configure(info, extra_info)){
72 return false;
73 }
74
75 // Lucene doesn't do case folding or stemming or accent folding at the
76 // moment
77 does_case = false;
78 does_stem = false;
79 does_accent = false;
80
81 return true;
82 }
83
84 /** add in the lucene specific params to TextQuery */
85 protected void addCustomQueryParams(Element param_list, String lang)
86 {
87 super.addCustomQueryParams(param_list, lang);
88 /** lucenes rank param is based on index fields, not ranked/not */
89 createParameter(RANK_PARAM, param_list, lang);
90
91 }
92
93 /** create a param and add to the list */
94 /** we override this to do a special rank param */
95 protected void createParameter(String name, Element param_list, String lang)
96 {
97 Element param = null;
98 if (name.equals(RANK_PARAM)) {
99 // get the fields
100 ArrayList fields = new ArrayList();
101 fields.add(RANK_PARAM_RANK_VALUE);
102 ArrayList field_names = new ArrayList();
103 field_names.add(getTextString("param.sortBy.rank", lang));
104 getSortByIndexData(fields, field_names, lang);
105
106 param = GSXML.createParameterDescription2(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)fields.get(0), fields, field_names );
107 }
108 if (param != null) {
109 param_list.appendChild(param);
110 } else {
111 super.createParameter(name, param_list, lang);
112 }
113 }
114
115 protected void getSortByIndexData(ArrayList index_ids, ArrayList index_names, String lang) {
116 // the index info -
117 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
118 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
119 int len = indexes.getLength();
120 // now add even if there is only one
121 for (int i=0; i<len; i++) {
122 Element index = (Element)indexes.item(i);
123 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
124 if (shortname.equals("") || shortname.equals("ZZ") || shortname.equals("TX")) {
125 continue;
126 }
127 index_ids.add("by"+shortname);
128 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
129 if (display_name.equals("")) {
130 display_name = index.getAttribute(GSXML.NAME_ATT);
131 if (display_name.equals("")) {
132 display_name = shortname;
133 }
134 }
135 index_names.add(display_name);
136
137 }
138
139 }
140
141 /** methods to handle actually doing the query */
142
143 /** do any initialisation of the query object */
144 protected boolean setUpQueryer(HashMap params) {
145 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index"+File.separatorChar;
146
147 String index = "didx";
148 String physical_index_language_name=null;
149 String physical_sub_index_name=null;
150 int maxdocs = 100;
151 int hits_per_page = 20;
152 int start_page = 1;
153 // set up the query params
154 Set entries = params.entrySet();
155 Iterator i = entries.iterator();
156 while (i.hasNext()) {
157 Map.Entry m = (Map.Entry)i.next();
158 String name = (String)m.getKey();
159 String value = (String)m.getValue();
160
161 if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) {
162 maxdocs = Integer.parseInt(value);
163 } else if (name.equals(HITS_PER_PAGE_PARAM)) {
164 hits_per_page = Integer.parseInt(value);
165 } else if (name.equals(START_PAGE_PARAM)) {
166 start_page = Integer.parseInt(value);
167
168 } else if (name.equals(MATCH_PARAM)) {
169 if (value.equals(MATCH_PARAM_ALL)) {
170 this.lucene_src.setDefaultConjunctionOperator("AND");
171 } else{
172 this.lucene_src.setDefaultConjunctionOperator("OR");
173 }
174 } else if (name.equals(RANK_PARAM)) {
175 if (value.equals(RANK_PARAM_RANK_VALUE)) {
176 value = null;
177 }
178 this.lucene_src.setSortField(value);
179 } else if (name.equals(LEVEL_PARAM)) {
180 if (value.toUpperCase().equals("SEC")){
181 index = "sidx";
182 }
183 else {
184 index = "didx";
185 }
186 } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) {
187 physical_sub_index_name=value;
188 } else if (name.equals(INDEX_LANGUAGE_PARAM)){
189 physical_index_language_name=value;
190 } // ignore any others
191 }
192 // set up start and end results if necessary
193 int start_results = 1;
194 if (start_page != 1) {
195 start_results = ((start_page-1) * hits_per_page) + 1;
196 }
197 int end_results = hits_per_page * start_page;
198 this.lucene_src.setStartResults(start_results);
199 this.lucene_src.setEndResults(end_results);
200
201
202 if (index.equals("sidx") || index.equals("didx")){
203 if (physical_sub_index_name!=null) {
204 index+=physical_sub_index_name;
205 }
206 if (physical_index_language_name!=null){
207 index+=physical_index_language_name;
208 }
209 }
210
211 this.lucene_src.setIndexDir(indexdir+index);
212 this.lucene_src.initialise();
213 return true;
214 }
215 /** do the query */
216 protected Object runQuery(String query) {
217 try {
218 LuceneQueryResult lqr=this.lucene_src.runQuery(query);
219 return lqr;
220 } catch (Exception e) {
221 logger.error ("exception happened in run query: ", e);
222 }
223
224 return null;
225 }
226 /** get the total number of docs that match */
227 protected long numDocsMatched(Object query_result) {
228 return ((LuceneQueryResult)query_result).getTotalDocs();
229
230 }
231 /** get the list of doc ids */
232 protected String [] getDocIDs(Object query_result) {
233 Vector docs = ((LuceneQueryResult)query_result).getDocs();
234 String [] doc_nums = new String [docs.size()];
235 for (int d = 0; d < docs.size(); d++) {
236 String doc_num = Long.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).num_);
237 doc_nums[d] = doc_num;
238 }
239 return doc_nums;
240 }
241 /** get the list of doc ranks */
242 protected String [] getDocRanks(Object query_result) {
243 Vector docs = ((LuceneQueryResult)query_result).getDocs();
244 String [] doc_ranks = new String [docs.size()];
245 for (int d = 0; d < docs.size(); d++) {
246 doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_);
247 }
248 return doc_ranks;
249 }
250 /** add in term info if available */
251 protected boolean addTermInfo(Element term_list, HashMap params,
252 Object query_result) {
253 String query_level = (String)params.get(LEVEL_PARAM); // the current query level
254
255 Vector terms = ((LuceneQueryResult)query_result).getTerms();
256 for (int t = 0; t < terms.size(); t++) {
257 LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t);
258
259 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
260 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
261 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
262 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
263 term_elem.setAttribute(FIELD_ATT, term_info.field_);
264 term_list.appendChild(term_elem);
265 }
266 return true;
267 }
268
269 protected String addFieldInfo(String query, String field) {
270 if (field.equals("") || field.equals("ZZ")) {
271 return query;
272 }
273 return field+":("+query+")";
274 }
275
276 protected void addQueryElem(StringBuffer s, String q, String f, String c) {
277
278 String combine="";
279 if (s.length()>0) {
280 combine = " "+c+" ";
281 }
282 s.append(combine + addFieldInfo(q,f));
283 }
284
285 /** Lucene doesn't use these options at the moment */
286 protected String addStemOptions(String query, String stem,
287 String casef, String accent) {
288 return query;
289 }
290}
Note: See TracBrowser for help on using the repository browser.