source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java@ 24738

Last change on this file since 24738 was 24738, checked in by davidb, 13 years ago

Shifted over to using LuceneWrapper3

  • Property svn:keywords set to Author Date Id Revision
File size: 6.2 KB
Line 
1/*
2* GS2LuceneSearch.java
3* Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4*
5* This program is free software; you can redistribute it and/or modify
6* the Free Software Foundation; either version 2 of the License, or
7* (at your option) any later version.
8*
9* This program is distributed in the hope that it will be useful,
10* but WITHOUT ANY WARRANTY; without even the implied warranty of
11* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12* GNU General Public License for more details.
13*
14* You should have received a copy of the GNU General Public License
15* along with this program; if not, write to the Free Software
16* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17*/
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.util.*;
23
24// XML classes
25import org.w3c.dom.Element;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.Document;
28// java classes
29import java.util.ArrayList;
30import java.util.HashMap;
31import java.io.File;
32import java.util.Iterator;
33import java.util.Set;
34import java.util.Map;
35import java.util.Vector;
36
37// Logging
38import org.apache.log4j.Logger;
39
40import org.greenstone.LuceneWrapper3.GS2LuceneQuery;
41import org.greenstone.LuceneWrapper3.LuceneQueryResult;
42
43public class GS2LuceneSearch extends SharedSoleneGS2FieldSearch
44{
45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
46
47 private GS2LuceneQuery lucene_src=null;
48
49 public GS2LuceneSearch()
50 {
51 this.lucene_src = new GS2LuceneQuery();
52 }
53
54
55 public void cleanUp() {
56 super.cleanUp();
57 this.lucene_src.cleanUp();
58 }
59
60
61 /** methods to handle actually doing the query */
62
63 /** do any initialisation of the query object */
64 protected boolean setUpQueryer(HashMap params) {
65 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index"+File.separatorChar;
66
67 String index = "didx";
68 String physical_index_language_name=null;
69 String physical_sub_index_name=null;
70 int maxdocs = 100;
71 int hits_per_page = 20;
72 int start_page = 1;
73 // set up the query params
74 Set entries = params.entrySet();
75 Iterator i = entries.iterator();
76 while (i.hasNext()) {
77 Map.Entry m = (Map.Entry)i.next();
78 String name = (String)m.getKey();
79 String value = (String)m.getValue();
80
81 if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) {
82 maxdocs = Integer.parseInt(value);
83 } else if (name.equals(HITS_PER_PAGE_PARAM)) {
84 hits_per_page = Integer.parseInt(value);
85 } else if (name.equals(START_PAGE_PARAM)) {
86 start_page = Integer.parseInt(value);
87
88 } else if (name.equals(MATCH_PARAM)) {
89 if (value.equals(MATCH_PARAM_ALL)) {
90 this.lucene_src.setDefaultConjunctionOperator("AND");
91 } else{
92 this.lucene_src.setDefaultConjunctionOperator("OR");
93 }
94 } else if (name.equals(RANK_PARAM)) {
95 if (value.equals(RANK_PARAM_RANK_VALUE)) {
96 value = null;
97 }
98 this.lucene_src.setSortField(value);
99 } else if (name.equals(LEVEL_PARAM)) {
100 if (value.toUpperCase().equals("SEC")){
101 index = "sidx";
102 }
103 else {
104 index = "didx";
105 }
106 } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) {
107 physical_sub_index_name=value;
108 } else if (name.equals(INDEX_LANGUAGE_PARAM)){
109 physical_index_language_name=value;
110 } // ignore any others
111 }
112 // set up start and end results if necessary
113 int start_results = 1;
114 if (start_page != 1) {
115 start_results = ((start_page-1) * hits_per_page) + 1;
116 }
117 int end_results = hits_per_page * start_page;
118 this.lucene_src.setStartResults(start_results);
119 this.lucene_src.setEndResults(end_results);
120
121 if (index.equals("sidx") || index.equals("didx")){
122 if (physical_sub_index_name!=null) {
123 index+=physical_sub_index_name;
124 }
125 if (physical_index_language_name!=null){
126 index+=physical_index_language_name;
127 }
128 }
129
130 this.lucene_src.setIndexDir(indexdir+index);
131 this.lucene_src.initialise();
132 return true;
133 }
134
135 /** do the query */
136 protected Object runQuery(String query) {
137 try {
138 LuceneQueryResult lqr=this.lucene_src.runQuery(query);
139 return lqr;
140 } catch (Exception e) {
141 logger.error ("Exception happened in runQuery(): ", e);
142 }
143
144 return null;
145 }
146
147 /** get the total number of docs that match */
148 protected long numDocsMatched(Object query_result) {
149 return ((LuceneQueryResult)query_result).getTotalDocs();
150
151 }
152
153 /** get the list of doc ids */
154 protected String [] getDocIDs(Object query_result) {
155 Vector docs = ((LuceneQueryResult)query_result).getDocs();
156 String [] doc_nums = new String [docs.size()];
157 for (int d = 0; d < docs.size(); d++) {
158 String doc_num = ((LuceneQueryResult.DocInfo) docs.elementAt(d)).id_;
159 doc_nums[d] = doc_num;
160 }
161 return doc_nums;
162 }
163
164 /** get the list of doc ranks */
165 protected String [] getDocRanks(Object query_result) {
166 Vector docs = ((LuceneQueryResult)query_result).getDocs();
167 String [] doc_ranks = new String [docs.size()];
168 for (int d = 0; d < docs.size(); d++) {
169 doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_);
170 }
171 return doc_ranks;
172 }
173
174 /** add in term info if available */
175 protected boolean addTermInfo(Element term_list, HashMap params,
176 Object query_result) {
177 String query_level = (String)params.get(LEVEL_PARAM); // the current query level
178
179 Vector terms = ((LuceneQueryResult)query_result).getTerms();
180 for (int t = 0; t < terms.size(); t++) {
181 LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t);
182
183 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
184 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
185 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
186 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
187 term_elem.setAttribute(FIELD_ATT, term_info.field_);
188 term_list.appendChild(term_elem);
189 }
190
191 Vector stopwords = ((LuceneQueryResult)query_result).getStopWords();
192 for (int t = 0; t < stopwords.size(); t++) {
193 String stopword = (String) stopwords.get(t);
194
195 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM);
196 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
197 term_list.appendChild(stopword_elem);
198 }
199
200 return true;
201 }
202}
Note: See TracBrowser for help on using the repository browser.