source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/LuceneSearch.java@ 24738

Last change on this file since 24738 was 24738, checked in by davidb, 13 years ago

Shifted over to using LuceneWrapper3

  • Property svn:keywords set to Author Date Id Revision
File size: 6.3 KB
RevLine 
[5257]1package org.greenstone.gsdl3.service;
2
3// Greenstone classes
4import org.greenstone.gsdl3.util.*;
5
6// XML classes
7import org.w3c.dom.Element;
8import org.w3c.dom.Document;
9import org.w3c.dom.NodeList;
10
11import java.util.HashMap;
[9272]12import java.util.ArrayList;
[5257]13
14import org.apache.lucene.analysis.Analyzer;
15import org.apache.lucene.analysis.standard.StandardAnalyzer;
16import org.apache.lucene.document.*; //Document;
17import org.apache.lucene.search.Searcher;
18import org.apache.lucene.search.IndexSearcher;
[24724]19import org.apache.lucene.index.IndexReader;
[5257]20import org.apache.lucene.search.Query;
21import org.apache.lucene.queryParser.QueryParser;
[24724]22//import org.apache.lucene.search.Hits;
[5257]23import org.apache.lucene.search.TermQuery;
[24724]24import org.apache.lucene.search.TopDocs;
[5257]25import org.apache.lucene.index.Term;
[24724]26import org.apache.lucene.store.FSDirectory;
27import org.apache.lucene.store.Directory;
[5257]28
[24738]29import org.greenstone.LuceneWrapper3.GS2Analyzer;
[5257]30import java.io.File;
[13124]31
32import org.apache.log4j.*;
33
[5257]34/**
35 *
36 */
37
38public class LuceneSearch
[24394]39 extends AbstractTextSearch {
[5257]40
[13270]41 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.LuceneSearch.class.getName());
[13124]42
[9272]43 protected static final String INDEX_ELEM = "index";
[24722]44
45 protected ArrayList index_ids;
46
47 public LuceneSearch()
48 {
49 index_ids = new ArrayList();
50 }
[5257]51
[13927]52 public boolean configure(Element info, Element extra_info) {
53 if (!super.configure(info, extra_info)){
54 return false;
55 }
56
57 default_index = "idx";
[24722]58
59 // cache index info read from config file
60 Element index_list
61 = (Element)GSXML.getChildByTagName(this.config_info,
62 INDEX_ELEM+GSXML.LIST_MODIFIER);
63 if (index_list != null) {
64 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
65 int len = indexes.getLength();
66 // now add even if there is only one
67 for (int i=0; i<len; i++) {
68 Element index = (Element)indexes.item(i);
69 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
70 }
71 } else {
72 // there is only one index, so we assume the default
73 index_ids.add(this.default_index);
74 }
75
[13927]76 return true;
77 }
78
[9272]79 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang)
80 {
[24722]81 // copying exercise for index_ids,
82 for (int i=0; i<this.index_ids.size(); i++) {
83 index_ids.add(this.index_ids.get(i));
84 }
85
86 // But need to work out display name from scratch as this uses
87 // the 'lang' parameter
88
89 Element index_list
90 = (Element)GSXML.getChildByTagName(this.config_info,
91 INDEX_ELEM+GSXML.LIST_MODIFIER);
[9272]92 if (index_list != null) {
93 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
94 int len = indexes.getLength();
95 // now add even if there is only one
96 for (int i=0; i<len; i++) {
97 Element index = (Element)indexes.item(i);
98 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
99
100 }
101 } else {
102 // there is only one index, so we assume the default
[20086]103 index_names.add("default index");
[5963]104 }
[5257]105 }
106
107
[24722]108 protected void initResultElement(Element result, Element doc_node_list, Element metadata_list)
109 {
110
[5257]111 // Create a new (empty) result message
[24394]112 result.setAttribute(GSXML.FROM_ATT, QUERY_SERVICE);
[5257]113 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
114 result.appendChild(doc_node_list);
[5991]115 result.appendChild(metadata_list);
[24722]116 }
117
118 protected boolean hasParamList(Element request, Element metadata_list)
119 {
[5257]120 // Get the parameters of the request
121 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
122 if (param_list == null) {
[13124]123 logger.error("TextQuery request had no paramList.");
[5991]124 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0");
[24722]125 return false; // signal that an empty result should be return
[5257]126 }
127
[24722]128 return true;
129 }
130
131 protected boolean hasQueryString(Element param_list, Element metadata_list)
132 {
133
134 // Process the request parameters to make sure a query has been specified
[5257]135 HashMap params = GSXML.extractParams(param_list, false);
[24722]136 String query_string = (String) params.get(QUERY_PARAM);
[5257]137
138 if (query_string == null || query_string.equals("")) {
[13124]139 logger.error("TextQuery request had no query string.");
[5991]140 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0");
[24722]141 return false; // signal that an empty result should be return
[5257]142 }
143
[24722]144 return true;
145 }
146
147
148
149 /** Process a text query - implemented by concrete subclasses */
150 protected Element processTextQuery(Element request) {
151
152 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
153 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
154 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
155 initResultElement(result,doc_node_list,metadata_list);
156
157 if (!hasParamList(request,metadata_list)) {
158 return result;
159 }
160
161 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
162 if (!hasQueryString(param_list,metadata_list)) {
163 return result;
164 }
165
166 HashMap params = GSXML.extractParams(param_list, false);
167 String query_string = (String) params.get(QUERY_PARAM);
168
[9272]169 // Get the index
170 String index = (String) params.get(INDEX_PARAM);
[13927]171 if (index == null || index.equals("")) {
172 index = this.default_index; // assume the default
173 }
[24722]174
175 try {
[5257]176 String index_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name);
[9272]177 index_dir += File.separator+index;
[24724]178 Directory index_dir_dir = FSDirectory.open(new File(index_dir));
179 Searcher searcher = new IndexSearcher(index_dir_dir);
180 Analyzer analyzer = new GS2Analyzer();
[5257]181
182 Term term = new Term("content", query_string);
[24722]183
[5257]184 Query query = new TermQuery(term);
[24722]185
[24724]186 TopDocs hits = searcher.search(query, Integer.MAX_VALUE);
187
188 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", ""+hits.scoreDocs.length);
[5991]189
[24724]190 IndexReader reader = ((IndexSearcher) searcher).getIndexReader();
191
192 for (int i=0; i<hits.scoreDocs.length; i++) {
193 int lucene_doc_num = hits.scoreDocs[i].doc;
194 org.apache.lucene.document.Document luc_doc = reader.document(lucene_doc_num);
[5257]195 String node_id = luc_doc.get("nodeID");
196 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
197 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
198 doc_node_list.appendChild(node);
199 }
200 } catch (Exception e) {
201 e.printStackTrace();
202 }
[24722]203
204 return result;
[5257]205 }
206
[5963]207
[5257]208}
Note: See TracBrowser for help on using the repository browser.