source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java@ 13911

Last change on this file since 13911 was 13911, checked in by kjdon, 17 years ago

changed the format of index and field info in buildConfig and collectionConfig. No fields any more, just use indexes. index has a shortname and a name - name comes from collect.cfg, shortname from build.cfg, eg text and TX, or section:text and ste. mg/mgpp/lucene colls config files are more similar now

  • Property svn:keywords set to Author Date Id Revision
File size: 8.7 KB
Line 
1/*
2 * GS2LuceneSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.util.*;
23
24// XML classes
25import org.w3c.dom.Element;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28// java classes
29import java.util.ArrayList;
30import java.util.HashMap;
31import java.io.File;
32import java.util.Iterator;
33import java.util.Set;
34import java.util.Map;
35import java.util.Vector;
36
37// Logging
38import org.apache.log4j.Logger;
39
40import org.greenstone.LuceneWrapper.GS2LuceneQuery;
41import org.greenstone.LuceneWrapper.LuceneQueryResult;
42
43public class GS2LuceneSearch
44 extends AbstractGS2FieldSearch
45{
46 protected static final String RANK_PARAM_RANK_VALUE = "rank";
47
48 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
49
50 private GS2LuceneQuery lucene_src=null;
51
52 public GS2LuceneSearch()
53 {
54 this.lucene_src = new GS2LuceneQuery();
55 // Lucene uses double operators, not single
56 AND_OPERATOR = "&&";
57 OR_OPERATOR = "||";
58
59 does_paging = true;
60 does_chunking = true;
61 }
62
63 public void cleanUp() {
64 super.cleanUp();
65 this.lucene_src.cleanUp();
66 }
67
68 /** configure this service */
69 public boolean configure(Element info, Element extra_info)
70 {
71 if (!super.configure(info, extra_info)){
72 return false;
73 }
74
75 // Lucene doesn't do case folding or stemming or accent folding at the
76 // moment
77 does_case = false;
78 does_stem = false;
79 does_accent = false;
80
81 return true;
82 }
83
84 /** add in the lucene specific params to TextQuery */
85 protected void addCustomQueryParams(Element param_list, String lang)
86 {
87 super.addCustomQueryParams(param_list, lang);
88 /** lucenes rank param is based on index fields, not ranked/not */
89 createParameter(RANK_PARAM, param_list, lang);
90 }
91
92 /** create a param and add to the list */
93 /** we override this to do a special rank param */
94 protected void createParameter(String name, Element param_list, String lang)
95 {
96 Element param = null;
97 if (name.equals(RANK_PARAM)) {
98 // get the fields
99 ArrayList fields = new ArrayList();
100 fields.add(RANK_PARAM_RANK_VALUE);
101 ArrayList field_names = new ArrayList();
102 field_names.add(getTextString("param.sortBy.rank", lang));
103 getSortByIndexData(fields, field_names, lang);
104
105 param = GSXML.createParameterDescription2(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)fields.get(0), fields, field_names );
106 }
107 if (param != null) {
108 param_list.appendChild(param);
109 } else {
110 super.createParameter(name, param_list, lang);
111 }
112 }
113
114 protected void getSortByIndexData(ArrayList index_ids, ArrayList index_names, String lang) {
115 // the index info -
116 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
117 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
118 int len = indexes.getLength();
119 // now add even if there is only one
120 for (int i=0; i<len; i++) {
121 Element index = (Element)indexes.item(i);
122 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
123 if (shortname.equals("") || shortname.equals("ZZ") || shortname.equals("TX")) {
124 continue;
125 }
126 index_ids.add("by"+shortname);
127 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
128 if (display_name.equals("")) {
129 display_name = index.getAttribute(GSXML.NAME_ATT);
130 if (display_name.equals("")) {
131 display_name = shortname;
132 }
133 }
134 index_names.add(display_name);
135
136 }
137
138 }
139
140 /** methods to handle actually doing the query */
141
142 /** do any initialisation of the query object */
143 protected boolean setUpQueryer(HashMap params) {
144 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index"+File.separatorChar;
145
146 String index = "didx";
147 int maxdocs = 100;
148 int hits_per_page = 20;
149 int start_page = 1;
150 // set up the query params
151 Set entries = params.entrySet();
152 Iterator i = entries.iterator();
153 while (i.hasNext()) {
154 Map.Entry m = (Map.Entry)i.next();
155 String name = (String)m.getKey();
156 String value = (String)m.getValue();
157
158 if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) {
159 maxdocs = Integer.parseInt(value);
160 } else if (name.equals(HITS_PER_PAGE_PARAM)) {
161 hits_per_page = Integer.parseInt(value);
162 } else if (name.equals(START_PAGE_PARAM)) {
163 start_page = Integer.parseInt(value);
164
165 } else if (name.equals(MATCH_PARAM)) {
166 if (value.equals(MATCH_PARAM_ALL)) {
167 this.lucene_src.setDefaultConjunctionOperator("AND");
168 } else{
169 this.lucene_src.setDefaultConjunctionOperator("OR");
170 }
171 } else if (name.equals(RANK_PARAM)) {
172 if (value.equals(RANK_PARAM_RANK_VALUE)) {
173 value = null;
174 }
175 this.lucene_src.setSortField(value);
176 } else if (name.equals(LEVEL_PARAM)) {
177 if (value.toUpperCase().equals("SEC")){
178 index = "sidx";
179 }
180 else {
181 index = "didx";
182 }
183 } // ignore any others
184 }
185 // set up start and end results if necessary
186 int start_results = 1;
187 if (start_page != 1) {
188 start_results = ((start_page-1) * hits_per_page) + 1;
189 }
190 int end_results = hits_per_page * start_page;
191 this.lucene_src.setStartResults(start_results);
192 this.lucene_src.setEndResults(end_results);
193
194 this.lucene_src.setIndexDir(indexdir+index);
195 this.lucene_src.initialise();
196 return true;
197 }
198 /** do the query */
199 protected Object runQuery(String query) {
200 try {
201 LuceneQueryResult lqr=this.lucene_src.runQuery(query);
202 return lqr;
203 } catch (Exception e) {
204 logger.error ("exception happened in run query: ", e);
205 }
206
207 return null;
208 }
209 /** get the total number of docs that match */
210 protected long numDocsMatched(Object query_result) {
211 return ((LuceneQueryResult)query_result).getTotalDocs();
212
213 }
214 /** get the list of doc ids */
215 protected String [] getDocIDs(Object query_result) {
216 Vector docs = ((LuceneQueryResult)query_result).getDocs();
217 String [] doc_nums = new String [docs.size()];
218 for (int d = 0; d < docs.size(); d++) {
219 String doc_num = Long.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).num_);
220 doc_nums[d] = doc_num;
221 }
222 return doc_nums;
223 }
224 /** get the list of doc ranks */
225 protected String [] getDocRanks(Object query_result) {
226 Vector docs = ((LuceneQueryResult)query_result).getDocs();
227 String [] doc_ranks = new String [docs.size()];
228 for (int d = 0; d < docs.size(); d++) {
229 doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_);
230 }
231 return doc_ranks;
232 }
233 /** add in term info if available */
234 protected boolean addTermInfo(Element term_list, HashMap params,
235 Object query_result) {
236 String query_level = (String)params.get(LEVEL_PARAM); // the current query level
237
238 Vector terms = ((LuceneQueryResult)query_result).getTerms();
239 for (int t = 0; t < terms.size(); t++) {
240 LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t);
241
242 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
243 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
244 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
245 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
246 term_elem.setAttribute(FIELD_ATT, term_info.field_);
247 term_list.appendChild(term_elem);
248 }
249 return true;
250 }
251
252 protected String addFieldInfo(String query, String field) {
253 if (field.equals("") || field.equals("ZZ")) {
254 return query;
255 }
256 return field+":("+query+")";
257 }
258
259 protected void addQueryElem(StringBuffer s, String q, String f, String c) {
260
261 String combine="";
262 if (s.length()>0) {
263 combine = " "+c+" ";
264 }
265 s.append(combine + addFieldInfo(q,f));
266 }
267
268 /** Lucene doesn't use these options at the moment */
269 protected String addStemOptions(String query, String stem,
270 String casef, String accent) {
271 return query;
272 }
273}
Note: See TracBrowser for help on using the repository browser.