source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/LuceneSearch.java@ 30616

Last change on this file since 30616 was 30474, checked in by kjdon, 8 years ago

added some info

  • Property svn:keywords set to Author Date Id Revision
File size: 6.3 KB
RevLine 
[30474]1/** This is a simple Lucene XML Search class. This is NOT the one
2 used by Greenstone Lucene collections. This is used by the custom
3 gberg collection */
4
[5257]5package org.greenstone.gsdl3.service;
6
7// Greenstone classes
[25763]8import java.io.File;
9import java.io.Serializable;
10import java.util.ArrayList;
[5257]11import java.util.HashMap;
12
[25763]13import org.apache.log4j.Logger;
[29143]14import org.apache.lucene.index.DirectoryReader;
[25763]15import org.apache.lucene.index.IndexReader;
16import org.apache.lucene.index.Term;
[5257]17import org.apache.lucene.search.IndexSearcher;
18import org.apache.lucene.search.Query;
19import org.apache.lucene.search.TermQuery;
[24724]20import org.apache.lucene.search.TopDocs;
[25763]21import org.apache.lucene.store.Directory;
[24724]22import org.apache.lucene.store.FSDirectory;
[25763]23import org.greenstone.gsdl3.util.GSFile;
24import org.greenstone.gsdl3.util.GSXML;
[28966]25import org.greenstone.gsdl3.util.XMLConverter;
26
27import org.w3c.dom.Document;
[25763]28import org.w3c.dom.Element;
29import org.w3c.dom.NodeList;
[5257]30
[25762]31public class LuceneSearch extends AbstractTextSearch
32{
33 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.LuceneSearch.class.getName());
[13124]34
[25762]35 protected static final String INDEX_ELEM = "index";
[24722]36
[25762]37 protected ArrayList<String> index_ids;
[24722]38
[25762]39 public LuceneSearch()
40 {
41 index_ids = new ArrayList<String>();
[13927]42 }
[24722]43
[25762]44 public boolean configure(Element info, Element extra_info)
45 {
46 if (!super.configure(info, extra_info))
47 {
48 return false;
49 }
[24722]50
[25762]51 default_index = "idx";
52
53 // cache index info read from config file
54 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
55 if (index_list != null)
56 {
57 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
58 int len = indexes.getLength();
59 // now add even if there is only one
60 for (int i = 0; i < len; i++)
61 {
62 Element index = (Element) indexes.item(i);
63 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
64 }
65 }
66 else
67 {
68 // there is only one index, so we assume the default
69 index_ids.add(this.default_index);
70 }
71
72 return true;
[24722]73 }
74
[25762]75 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
76 {
77 // copying exercise for index_ids,
78 for (int i = 0; i < this.index_ids.size(); i++)
79 {
80 index_ids.add(this.index_ids.get(i));
81 }
[24722]82
[25762]83 // But need to work out display name from scratch as this uses
84 // the 'lang' parameter
85
86 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
87 if (index_list != null)
88 {
89 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
90 int len = indexes.getLength();
91 // now add even if there is only one
92 for (int i = 0; i < len; i++)
93 {
94 Element index = (Element) indexes.item(i);
95 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
96
97 }
98 }
99 else
100 {
101 // there is only one index, so we assume the default
102 index_names.add("default index");
103 }
[5963]104 }
[5257]105
[25762]106 protected void initResultElement(Element result, Element doc_node_list, Element metadata_list)
107 {
108 // Create a new (empty) result message
109 result.setAttribute(GSXML.FROM_ATT, QUERY_SERVICE);
110 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
111 result.appendChild(doc_node_list);
112 result.appendChild(metadata_list);
113 }
[24722]114
[25762]115 protected boolean hasParamList(Element request, Element metadata_list)
116 {
117 // Get the parameters of the request
118 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
119 if (param_list == null)
120 {
121 logger.error("TextQuery request had no paramList.");
[28966]122 GSXML.addMetadata(metadata_list, "numDocsMatched", "0");
[25762]123 return false; // signal that an empty result should be return
124 }
[24722]125
[25762]126 return true;
[5257]127 }
128
[25762]129 protected boolean hasQueryString(Element param_list, Element metadata_list)
130 {
131 // Process the request parameters to make sure a query has been specified
132 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false);
133 String query_string = (String) params.get(QUERY_PARAM);
[24722]134
[25762]135 if (query_string == null || query_string.equals(""))
136 {
137 logger.error("TextQuery request had no query string.");
[28966]138 GSXML.addMetadata(metadata_list, "numDocsMatched", "0");
[25762]139 return false; // signal that an empty result should be return
140 }
[5257]141
[25762]142 return true;
[5257]143 }
144
[25762]145 /** Process a text query - implemented by concrete subclasses */
146 protected Element processTextQuery(Element request)
147 {
[28966]148 Document result_doc = XMLConverter.newDOM();
149 Element result = result_doc.createElement(GSXML.RESPONSE_ELEM);
150 Element doc_node_list = result_doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
151 Element metadata_list = result_doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
[25762]152 initResultElement(result, doc_node_list, metadata_list);
[24722]153
[25762]154 if (!hasParamList(request, metadata_list))
155 {
156 return result;
157 }
[24722]158
[25762]159 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
160 if (!hasQueryString(param_list, metadata_list))
161 {
162 return result;
163 }
[24722]164
[25762]165 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false);
166 String query_string = (String) params.get(QUERY_PARAM);
[24722]167
[25762]168 // Get the index
169 String index = (String) params.get(INDEX_PARAM);
170 if (index == null || index.equals(""))
171 {
172 index = this.default_index; // assume the default
173 }
[24722]174
[25762]175 try
176 {
177 String index_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name);
178 index_dir += File.separator + index;
179 Directory index_dir_dir = FSDirectory.open(new File(index_dir));
[29143]180 IndexReader reader = DirectoryReader.open(index_dir_dir); //deprecated: IndexReader.open(index_dir_dir);
181 IndexSearcher searcher = new IndexSearcher(reader);
[24722]182
[25762]183 Term term = new Term("content", query_string);
[24722]184
[25762]185 Query query = new TermQuery(term);
[24724]186
[25762]187 TopDocs hits = searcher.search(query, Integer.MAX_VALUE);
[24724]188
[28966]189 GSXML.addMetadata(metadata_list, "numDocsMatched", "" + hits.scoreDocs.length);
[25762]190
191 for (int i = 0; i < hits.scoreDocs.length; i++)
192 {
193 int lucene_doc_num = hits.scoreDocs[i].doc;
194 org.apache.lucene.document.Document luc_doc = reader.document(lucene_doc_num);
195 String node_id = luc_doc.get("nodeID");
[28966]196 Element node = result_doc.createElement(GSXML.DOC_NODE_ELEM);
[25762]197 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
198 doc_node_list.appendChild(node);
199 }
200 }
201 catch (Exception e)
202 {
203 e.printStackTrace();
204 }
205
206 return result;
[5257]207 }
208}
Note: See TracBrowser for help on using the repository browser.