source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java@ 28966

Last change on this file since 28966 was 28966, checked in by kjdon, 10 years ago

Lots of changes. Mainly to do with removing this.doc from everywhere. Document is not thread safe. Now we tend to create a new Document everytime we are starting a new page/message etc. in service this.desc_doc is available as teh document to create service info stuff. But it should only be used for this and not for other messages. newDOM is now static for XMLConverter. method param changes for some GSXML methods.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.3 KB
Line 
1/*
2 * GS2LuceneSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.io.File;
23import java.util.ArrayList;
24import java.util.HashMap;
25import java.util.Iterator;
26import java.util.Map;
27import java.util.Set;
28import java.util.Vector;
29
30import org.apache.log4j.Logger;
31import org.greenstone.LuceneWrapper3.GS2LuceneQuery;
32import org.greenstone.LuceneWrapper3.LuceneQueryResult;
33import org.greenstone.gsdl3.util.FacetWrapper;
34import org.greenstone.gsdl3.util.GSFile;
35import org.greenstone.gsdl3.util.GSXML;
36import org.greenstone.gsdl3.util.XMLConverter;
37
38import org.w3c.dom.Document;
39import org.w3c.dom.Element;
40
41public class GS2LuceneSearch extends SharedSoleneGS2FieldSearch
42{
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
44
45 private GS2LuceneQuery lucene_src = null;
46
47 public GS2LuceneSearch()
48 {
49 this.lucene_src = new GS2LuceneQuery();
50 }
51
52 public void cleanUp()
53 {
54 super.cleanUp();
55 this.lucene_src.cleanUp();
56 }
57
58 /** methods to handle actually doing the query */
59
60 /** do any initialisation of the query object */
61 protected boolean setUpQueryer(HashMap params)
62 {
63 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index" + File.separatorChar;
64
65 String index = "didx";
66 String physical_index_language_name = null;
67 String physical_sub_index_name = null;
68 int maxdocs = 100;
69 int hits_per_page = 20;
70 int start_page = 1;
71 String sort_field = GS2LuceneQuery.SORT_RANK;
72 String sort_order = SORT_ORDER_ASCENDING;
73 // set up the query params
74 Set entries = params.entrySet();
75 Iterator i = entries.iterator();
76 while (i.hasNext())
77 {
78 Map.Entry m = (Map.Entry) i.next();
79 String name = (String) m.getKey();
80 String value = (String) m.getValue();
81
82 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
83 {
84 maxdocs = Integer.parseInt(value);
85 }
86 else if (name.equals(HITS_PER_PAGE_PARAM))
87 {
88 hits_per_page = Integer.parseInt(value);
89 }
90 else if (name.equals(START_PAGE_PARAM))
91 {
92 start_page = Integer.parseInt(value);
93
94 }
95 else if (name.equals(MATCH_PARAM))
96 {
97 if (value.equals(MATCH_PARAM_ALL))
98 {
99 this.lucene_src.setDefaultConjunctionOperator("AND");
100 }
101 else
102 {
103 this.lucene_src.setDefaultConjunctionOperator("OR");
104 }
105 }
106 else if (name.equals(RANK_PARAM))
107 {
108 if (value.equals(RANK_PARAM_RANK))
109 {
110 value = GS2LuceneQuery.SORT_RANK;
111 } else if (value.equals(RANK_PARAM_NONE)) {
112 value = GS2LuceneQuery.SORT_NATURAL;
113 }
114 this.lucene_src.setSortField(value);
115 sort_field = value;
116 }
117 else if (name.equals(SORT_ORDER_PARAM)) {
118 sort_order = value;
119 }
120 else if (name.equals(LEVEL_PARAM))
121 {
122 if (value.toUpperCase().equals("SEC"))
123 {
124 index = "sidx";
125 }
126 else
127 {
128 index = "didx";
129 }
130 }
131 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
132 {
133 physical_sub_index_name = value;
134 }
135 else if (name.equals(INDEX_LANGUAGE_PARAM))
136 {
137 physical_index_language_name = value;
138 } // ignore any others
139 }
140 // set up start and end results if necessary
141 int start_results = 1;
142 if (start_page != 1)
143 {
144 start_results = ((start_page - 1) * hits_per_page) + 1;
145 }
146 int end_results = hits_per_page * start_page;
147 this.lucene_src.setStartResults(start_results);
148 this.lucene_src.setEndResults(end_results);
149
150 if (index.equals("sidx") || index.equals("didx"))
151 {
152 if (physical_sub_index_name != null)
153 {
154 index += physical_sub_index_name;
155 }
156 if (physical_index_language_name != null)
157 {
158 index += physical_index_language_name;
159 }
160 }
161
162 // default order for rank is descending, while for other
163 // fields it is ascending. So reverse_sort is different for
164 // the two cases.
165 if (sort_field.equals(GS2LuceneQuery.SORT_RANK)) {
166 if (sort_order.equals(SORT_ORDER_ASCENDING)) {
167 this.lucene_src.setReverseSort(true);
168 } else {
169 this.lucene_src.setReverseSort(false);
170 }
171 } else {
172 if (sort_order.equals(SORT_ORDER_DESCENDING)) {
173 this.lucene_src.setReverseSort(true);
174 } else {
175 this.lucene_src.setReverseSort(false);
176 }
177 }
178 this.lucene_src.setIndexDir(indexdir + index);
179 this.lucene_src.initialise();
180 return true;
181 }
182
183 /** do the query */
184 protected Object runQuery(String query)
185 {
186 try
187 {
188 LuceneQueryResult lqr = this.lucene_src.runQuery(query);
189 return lqr;
190 }
191 catch (Exception e)
192 {
193 logger.error("Exception happened in runQuery(): ", e);
194 }
195
196 return null;
197 }
198
199 /** get the total number of docs that match */
200 protected long numDocsMatched(Object query_result)
201 {
202 return ((LuceneQueryResult) query_result).getTotalDocs();
203 }
204
205 /** get the list of doc ids */
206 protected String[] getDocIDs(Object query_result)
207 {
208 Vector docs = ((LuceneQueryResult) query_result).getDocs();
209 String[] doc_nums = new String[docs.size()];
210 for (int d = 0; d < docs.size(); d++)
211 {
212 String doc_num = ((LuceneQueryResult.DocInfo) docs.elementAt(d)).id_;
213 doc_nums[d] = doc_num;
214 }
215 return doc_nums;
216 }
217
218 /** get the list of doc ranks */
219 protected String[] getDocRanks(Object query_result)
220 {
221 Vector docs = ((LuceneQueryResult) query_result).getDocs();
222 String[] doc_ranks = new String[docs.size()];
223 for (int d = 0; d < docs.size(); d++)
224 {
225 doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_);
226 }
227 return doc_ranks;
228 }
229
230 /** add in term info if available */
231 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
232 {
233 Document doc = term_list.getOwnerDocument();
234 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
235
236 Vector terms = ((LuceneQueryResult) query_result).getTerms();
237 for (int t = 0; t < terms.size(); t++)
238 {
239 LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t);
240
241 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
242 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
243 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
244 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
245 term_elem.setAttribute(FIELD_ATT, term_info.field_);
246 term_list.appendChild(term_elem);
247 }
248
249 Vector stopwords = ((LuceneQueryResult) query_result).getStopWords();
250 for (int t = 0; t < stopwords.size(); t++)
251 {
252 String stopword = (String) stopwords.get(t);
253
254 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
255 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
256 term_list.appendChild(stopword_elem);
257 }
258
259 return true;
260 }
261
262 protected ArrayList<FacetWrapper> getFacets(Object query_result)
263 {
264 return null;
265 }
266}
Note: See TracBrowser for help on using the repository browser.