- Timestamp:
- 2012-06-06T14:29:29+12:00 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/LuceneSearch.java
r25635 r25762 5 5 6 6 // XML classes 7 import org.w3c.dom.Element; 7 import org.w3c.dom.Element; 8 8 import org.w3c.dom.Document; 9 import org.w3c.dom.NodeList; 9 import org.w3c.dom.NodeList; 10 10 11 11 import java.util.HashMap; … … 37 37 */ 38 38 39 public class LuceneSearch 40 extends AbstractTextSearch { 41 42 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.LuceneSearch.class.getName()); 43 44 protected static final String INDEX_ELEM = "index"; 45 46 protected ArrayList<String> index_ids; 47 48 public LuceneSearch() 49 { 50 index_ids = new ArrayList<String>(); 51 } 52 53 public boolean configure(Element info, Element extra_info) { 54 if (!super.configure(info, extra_info)){ 55 return false; 56 } 57 58 default_index = "idx"; 59 60 // cache index info read from config file 61 Element index_list 62 = (Element)GSXML.getChildByTagName(this.config_info, 63 INDEX_ELEM+GSXML.LIST_MODIFIER); 64 if (index_list != null) { 65 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM); 66 int len = indexes.getLength(); 67 // now add even if there is only one 68 for (int i=0; i<len; i++) { 69 Element index = (Element)indexes.item(i); 70 index_ids.add(index.getAttribute(GSXML.NAME_ATT)); 71 } 72 } else { 73 // there is only one index, so we assume the default 74 index_ids.add(this.default_index); 75 } 76 77 return true; 78 } 79 80 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang) 81 { 82 // copying exercise for index_ids, 83 for (int i=0; i<this.index_ids.size(); i++) { 84 index_ids.add(this.index_ids.get(i)); 85 } 86 87 // But need to work out display name from scratch as this uses 88 // the 'lang' parameter 89 90 Element index_list 91 = (Element)GSXML.getChildByTagName(this.config_info, 92 INDEX_ELEM+GSXML.LIST_MODIFIER); 93 if (index_list != null) { 94 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM); 95 int len = indexes.getLength(); 96 // now add even if there is only one 97 for (int i=0; i<len; i++) { 98 Element index = (Element)indexes.item(i); 99 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en")); 100 101 } 102 } else { 103 // there is only one index, so we assume the default 104 index_names.add("default index"); 105 } 106 } 107 108 109 protected void initResultElement(Element result, Element doc_node_list, Element metadata_list) 110 { 111 112 // Create a new (empty) result message 113 result.setAttribute(GSXML.FROM_ATT, QUERY_SERVICE); 114 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 115 result.appendChild(doc_node_list); 116 result.appendChild(metadata_list); 117 } 118 119 protected boolean hasParamList(Element request, Element metadata_list) 120 { 121 // Get the parameters of the request 122 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 123 if (param_list == null) { 124 logger.error("TextQuery request had no paramList."); 125 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0"); 126 return false; // signal that an empty result should be return 127 } 128 129 return true; 130 } 131 132 protected boolean hasQueryString(Element param_list, Element metadata_list) 133 { 134 135 // Process the request parameters to make sure a query has been specified 136 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false); 137 String query_string = (String) params.get(QUERY_PARAM); 138 139 if (query_string == null || query_string.equals("")) { 140 logger.error("TextQuery request had no query string."); 141 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0"); 142 return false; // signal that an empty result should be return 143 } 144 145 return true; 146 } 147 148 149 150 /** Process a text query - implemented by concrete subclasses */ 151 protected Element processTextQuery(Element request) { 152 153 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 154 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 155 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); 156 initResultElement(result,doc_node_list,metadata_list); 157 158 if (!hasParamList(request,metadata_list)) { 159 return result; 160 } 161 162 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 163 if (!hasQueryString(param_list,metadata_list)) { 164 return result; 165 } 166 167 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false); 168 String query_string = (String) params.get(QUERY_PARAM); 169 170 // Get the index 171 String index = (String) params.get(INDEX_PARAM); 172 if (index == null || index.equals("")) { 173 index = this.default_index; // assume the default 174 } 175 176 try { 177 String index_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name); 178 index_dir += File.separator+index; 179 Directory index_dir_dir = FSDirectory.open(new File(index_dir)); 180 Searcher searcher = new IndexSearcher(index_dir_dir); 181 Analyzer analyzer = new GS2Analyzer(); 182 183 Term term = new Term("content", query_string); 184 185 Query query = new TermQuery(term); 186 187 TopDocs hits = searcher.search(query, Integer.MAX_VALUE); 188 189 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", ""+hits.scoreDocs.length); 190 191 IndexReader reader = ((IndexSearcher) searcher).getIndexReader(); 192 193 for (int i=0; i<hits.scoreDocs.length; i++) { 194 int lucene_doc_num = hits.scoreDocs[i].doc; 195 org.apache.lucene.document.Document luc_doc = reader.document(lucene_doc_num); 196 String node_id = luc_doc.get("nodeID"); 197 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 198 node.setAttribute(GSXML.NODE_ID_ATT, node_id); 199 doc_node_list.appendChild(node); 200 } 201 } catch (Exception e) { 202 e.printStackTrace(); 203 } 204 205 return result; 206 } 207 208 39 public class LuceneSearch extends AbstractTextSearch 40 { 41 42 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.LuceneSearch.class.getName()); 43 44 protected static final String INDEX_ELEM = "index"; 45 46 protected ArrayList<String> index_ids; 47 48 public LuceneSearch() 49 { 50 index_ids = new ArrayList<String>(); 51 } 52 53 public boolean configure(Element info, Element extra_info) 54 { 55 if (!super.configure(info, extra_info)) 56 { 57 return false; 58 } 59 60 default_index = "idx"; 61 62 // cache index info read from config file 63 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER); 64 if (index_list != null) 65 { 66 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM); 67 int len = indexes.getLength(); 68 // now add even if there is only one 69 for (int i = 0; i < len; i++) 70 { 71 Element index = (Element) indexes.item(i); 72 index_ids.add(index.getAttribute(GSXML.NAME_ATT)); 73 } 74 } 75 else 76 { 77 // there is only one index, so we assume the default 78 index_ids.add(this.default_index); 79 } 80 81 return true; 82 } 83 84 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang) 85 { 86 // copying exercise for index_ids, 87 for (int i = 0; i < this.index_ids.size(); i++) 88 { 89 index_ids.add(this.index_ids.get(i)); 90 } 91 92 // But need to work out display name from scratch as this uses 93 // the 'lang' parameter 94 95 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER); 96 if (index_list != null) 97 { 98 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM); 99 int len = indexes.getLength(); 100 // now add even if there is only one 101 for (int i = 0; i < len; i++) 102 { 103 Element index = (Element) indexes.item(i); 104 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en")); 105 106 } 107 } 108 else 109 { 110 // there is only one index, so we assume the default 111 index_names.add("default index"); 112 } 113 } 114 115 protected void initResultElement(Element result, Element doc_node_list, Element metadata_list) 116 { 117 118 // Create a new (empty) result message 119 result.setAttribute(GSXML.FROM_ATT, QUERY_SERVICE); 120 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 121 result.appendChild(doc_node_list); 122 result.appendChild(metadata_list); 123 } 124 125 protected boolean hasParamList(Element request, Element metadata_list) 126 { 127 // Get the parameters of the request 128 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); 129 if (param_list == null) 130 { 131 logger.error("TextQuery request had no paramList."); 132 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0"); 133 return false; // signal that an empty result should be return 134 } 135 136 return true; 137 } 138 139 protected boolean hasQueryString(Element param_list, Element metadata_list) 140 { 141 142 // Process the request parameters to make sure a query has been specified 143 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false); 144 String query_string = (String) params.get(QUERY_PARAM); 145 146 if (query_string == null || query_string.equals("")) 147 { 148 logger.error("TextQuery request had no query string."); 149 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0"); 150 return false; // signal that an empty result should be return 151 } 152 153 return true; 154 } 155 156 /** Process a text query - implemented by concrete subclasses */ 157 protected Element processTextQuery(Element request) 158 { 159 160 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 161 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER); 162 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER); 163 initResultElement(result, doc_node_list, metadata_list); 164 165 if (!hasParamList(request, metadata_list)) 166 { 167 return result; 168 } 169 170 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); 171 if (!hasQueryString(param_list, metadata_list)) 172 { 173 return result; 174 } 175 176 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false); 177 String query_string = (String) params.get(QUERY_PARAM); 178 179 // Get the index 180 String index = (String) params.get(INDEX_PARAM); 181 if (index == null || index.equals("")) 182 { 183 index = this.default_index; // assume the default 184 } 185 186 try 187 { 188 String index_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name); 189 index_dir += File.separator + index; 190 Directory index_dir_dir = FSDirectory.open(new File(index_dir)); 191 Searcher searcher = new IndexSearcher(index_dir_dir); 192 Analyzer analyzer = new GS2Analyzer(); 193 194 Term term = new Term("content", query_string); 195 196 Query query = new TermQuery(term); 197 198 TopDocs hits = searcher.search(query, Integer.MAX_VALUE); 199 200 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "" + hits.scoreDocs.length); 201 202 IndexReader reader = ((IndexSearcher) searcher).getIndexReader(); 203 204 for (int i = 0; i < hits.scoreDocs.length; i++) 205 { 206 int lucene_doc_num = hits.scoreDocs[i].doc; 207 org.apache.lucene.document.Document luc_doc = reader.document(lucene_doc_num); 208 String node_id = luc_doc.get("nodeID"); 209 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 210 node.setAttribute(GSXML.NODE_ID_ATT, node_id); 211 doc_node_list.appendChild(node); 212 } 213 } 214 catch (Exception e) 215 { 216 e.printStackTrace(); 217 } 218 219 return result; 220 } 221 209 222 }
Note:
See TracChangeset
for help on using the changeset viewer.