source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/LuceneSearch.java@ 6490

Last change on this file since 6490 was 5991, checked in by kjdon, 21 years ago

now returns numDocsMatched

  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1package org.greenstone.gsdl3.service;
2
3// Greenstone classes
4import org.greenstone.gdbm.*;
5import org.greenstone.gsdl3.util.*;
6
7// XML classes
8import org.w3c.dom.Element;
9import org.w3c.dom.Document;
10import org.w3c.dom.NodeList;
11
12import java.util.HashMap;
13
14import org.apache.lucene.analysis.Analyzer;
15import org.apache.lucene.analysis.standard.StandardAnalyzer;
16import org.apache.lucene.document.*; //Document;
17import org.apache.lucene.search.Searcher;
18import org.apache.lucene.search.IndexSearcher;
19import org.apache.lucene.search.Query;
20import org.apache.lucene.search.Hits;
21import org.apache.lucene.queryParser.QueryParser;
22import org.apache.lucene.search.TermQuery;
23import org.apache.lucene.index.Term;
24
25import java.io.File;
26/**
27 *
28 * @author <a href="mailto:[email protected]">Katherine Don</a>
29 * @version $Revision: 5991 $
30 */
31
32public class LuceneSearch
33 extends ServiceRack {
34
35 // the services on offer
36 // these strings must match what is found in the properties file
37 protected static final String TEXT_QUERY_SERVICE = "TextQuery";
38
39 protected static final String QUERY_PARAM = "query";
40
41 public boolean configure(Element info, Element extra_info) {
42
43 System.out.println("Configuring LuceneSearch");
44 Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM);
45 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
46 tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
47 this.short_service_info.appendChild(tq_service);
48
49 // look for format info
50 String path = GSPath.appendLink(GSXML.SEARCH_ELEM, GSXML.FORMAT_ELEM);
51 Element format = (Element) GSXML.getNodeByPath(extra_info, path);
52 if (format != null) {
53 System.out.println("found format :"+this.converter.getString(format));
54 this.format_info_map.put(TEXT_QUERY_SERVICE, this.doc.importNode(format, true));
55 }
56
57 return true;
58 }
59
60 protected Element getServiceDescription(String service, String lang, String subset) {
61
62 if (!service.equals(TEXT_QUERY_SERVICE)) {
63 return null;
64 }
65 Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM);
66 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
67 tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
68 if (subset==null || subset.equals(GSXML.DISPLAY_TEXT_ELEM)) {
69 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(TEXT_QUERY_SERVICE+".name", lang)));
70 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(TEXT_QUERY_SERVICE+".submit", lang)));
71 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(TEXT_QUERY_SERVICE+".description", lang)));
72 }
73 if (subset==null || subset.equals(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER)) {
74 Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
75 Element param = GSXML.createParameterDescription(this.doc, QUERY_PARAM, getTextString("param."+QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null);
76 param_list.appendChild(param);
77 tq_service.appendChild(param_list);
78 }
79 return tq_service;
80 }
81
82 /** Process a text query - implemented by concrete subclasses */
83 protected Element processTextQuery(Element request) {
84
85 // Create a new (empty) result message
86 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
87 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
88 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
89 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
90 result.appendChild(doc_node_list);
91 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
92 result.appendChild(metadata_list);
93 // Get the parameters of the request
94 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
95 if (param_list == null) {
96 System.err.println("LuceneSearch Error: TextQuery request had no paramList.");
97 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0");
98 return result; // Return the empty result
99 }
100
101 // Process the request parameters
102 HashMap params = GSXML.extractParams(param_list, false);
103
104 // Make sure a query has been specified
105 String query_string = (String) params.get(QUERY_PARAM);
106 if (query_string == null || query_string.equals("")) {
107 System.err.println("LuceneSearch Error: TextQuery request had no query string.");
108 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0");
109 return result; // Return the empty result
110 }
111
112 try {
113 String index_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name);
114 index_dir += File.separator+"idx";
115 Searcher searcher = new IndexSearcher(index_dir);
116 Analyzer analyzer = new StandardAnalyzer();
117
118 Term term = new Term("content", query_string);
119
120 Query query = new TermQuery(term);
121
122 Hits hits = searcher.search(query);
123 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", ""+hits.length());
124
125 for (int i=0; i<hits.length(); i++) {
126 org.apache.lucene.document.Document luc_doc = hits.doc(i);
127 String node_id = luc_doc.get("nodeID");
128 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
129 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
130 doc_node_list.appendChild(node);
131 }
132 } catch (Exception e) {
133 e.printStackTrace();
134 }
135
136 return result;
137
138 }
139
140
141}
Note: See TracBrowser for help on using the repository browser.