source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/IViaSearch.java@ 32419

Last change on this file since 32419 was 29558, checked in by kjdon, 9 years ago

work around does_paging, does_chunking. only add in maxdocs, hitsperpage params if the service actually uses them. lucnee/solr, don't use maxdocs any more. I haven't had a chance to clean up the changes, but I need to commit, so there may be extraneous debug statements still here.

  • Property svn:keywords set to Author Date Id Revision
File size: 6.6 KB
RevLine 
[9234]1package org.greenstone.gsdl3.service;
2
3// Greenstone classes
[22085]4import org.greenstone.util.Misc;
[9234]5import org.greenstone.gsdl3.util.*;
6
7// XML classes
8import org.w3c.dom.Element;
9import org.w3c.dom.Document;
10import org.w3c.dom.NodeList;
11
12//Java classes
13import java.util.ArrayList;
14import java.util.HashMap;
15import java.io.File;
16import java.io.BufferedReader;
[25635]17import java.io.Serializable;
[9234]18import java.net.Authenticator;
19
[13124]20import org.apache.log4j.*;
21
[9234]22/**
23 *
[25727]24 * @author Katherine Don
[28966]25 * @author Chi-Yu Huang
[9234]26 */
27
28public class IViaSearch
[24394]29 extends AbstractTextSearch {
[9234]30
[13124]31
[13270]32 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.IViaSearch.class.getName());
[13124]33
[9234]34 // have standard gs param names for hits per page, and start page
35 // these need to be mapped to iVia params
36 protected static final String IM_HITS_PARAM = "no_of_records_per_page";
37 protected static final String IM_START_PAGE_PARAM = "start_page_no";
38
39 protected String ivia_server_url = null;
[25635]40 protected ArrayList<String> index_ids = null;
[9234]41 public IViaSearch()
42 {
[29558]43 does_paging = true;
44 does_multi_index_search = true;
[9234]45 }
46
47 //Configure IViaSearch Service
48 public boolean configure(Element info, Element extra_info)
49 {
[10093]50 if (!super.configure(info, extra_info)){
51 return false;
52 }
53
[9234]54 Element server_elem = (Element)GSXML.getChildByTagName(info, "iViaServer");
55 if (server_elem == null) {
[13124]56 logger.error("no iViaServer element found");
[9234]57 return false;
58 }
59 ivia_server_url = server_elem.getAttribute("url");
60 if (ivia_server_url.equals("")) {
[13124]61 logger.error("no url for the iViaServer element");
[9234]62 return false;
63 }
[9902]64 this.default_index = ",kw,au,su,ti,de,fu,"; // all of them
[25635]65 index_ids = new ArrayList<String>();
[10193]66 index_ids.add("kw");
67 index_ids.add("au");
68 index_ids.add("su");
69 index_ids.add("ti");
70 index_ids.add("de");
71 index_ids.add("fu");
72
[10093]73 return true;
[9234]74 }
75
76 /** Process a text query - implemented by concrete subclasses */
77 protected Element processTextQuery(Element request) {
[28966]78 Document result_doc = XMLConverter.newDOM();
[9234]79 // Create a new (empty) result message
[28966]80 Element result = result_doc.createElement(GSXML.RESPONSE_ELEM);
[24394]81 result.setAttribute(GSXML.FROM_ATT, QUERY_SERVICE);
[9234]82 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
[28966]83 Element doc_node_list = result_doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
[9234]84 result.appendChild(doc_node_list);
85 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
86 if (param_list == null) {
[13124]87 logger.error("TextQuery request had no paramList.");
[9234]88 return result; // Return the empty result
89 }
90
91 // Process the request parameters
[25635]92 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false);
[9234]93
94 // Make sure a query has been specified
95 String query = (String) params.get(QUERY_PARAM);
96 if (query == null || query.equals("")) {
97 return result; // Return the empty result
98 }
99
100 // tidy whitespace
101 query = query.replaceAll("\\s+", "+");
102 String url_string = ivia_server_url+"/cgi-bin/canned_search?theme=gsdl3&query="+query;
103
104 // check for fields
105 String fields = (String) params.get(INDEX_PARAM);
[10193]106 fields = checkFieldParam(fields); // removes invalid fields
107 if (!fields.equals("")) {
[9234]108 url_string += "&fields="+fields;
109 }
110 //check for hits per page
111 String hits_per_page = (String) params.get(HITS_PER_PAGE_PARAM);
112 if (hits_per_page != null && !hits_per_page.equals("")) {
113 url_string += "&"+IM_HITS_PARAM+"="+hits_per_page;
114 }
115
116 // check for start page
117 String start_page = (String) params.get(START_PAGE_PARAM);
118 if (start_page != null && !start_page.equals("")) {
119 url_string += "&"+IM_START_PAGE_PARAM+"="+start_page;
120 }
121 String results_num = null;
122 String doc_ids = null;
[9874]123 BufferedReader reader = null;
[9234]124 try {
[13124]125 logger.debug("sending "+url_string);
[9874]126 reader = Misc.makeHttpConnection(url_string);
[9234]127 results_num = reader.readLine();
128 doc_ids = reader.readLine();
[9874]129 } catch (java.net.MalformedURLException e) {
[28966]130 GSXML.addError(result, "Malformed URL: "+url_string);
[9234]131 return result;
[9874]132 } catch (java.io.IOException e) {
[28966]133 GSXML.addError(result, "IOException during connection to "+url_string+": "+e.toString());
[9874]134 return result;
[9234]135 }
136
[10194]137 if (results_num.startsWith("Resources: ") && doc_ids.startsWith("Ids: ")) {
[9234]138 results_num = results_num.substring(11);
139 doc_ids = doc_ids.substring(5).trim();
[10194]140
[9234]141 } else {
[13124]142 logger.error("badly formatted results:");
[10194]143 StringBuffer result_string = new StringBuffer();
144 result_string.append("Error: badly formatted result from IVia server:\n ");
145 result_string.append(results_num);
146 result_string.append(doc_ids);
147 String line;
148 try {
149 while((line = reader.readLine()) != null) {
150 result_string.append(line);
151 }
152 } catch (Exception e) {
153 result_string.append("Exception: "+e);
154 }
[28966]155 GSXML.addError(result, result_string.toString());
[10194]156
[9234]157 return result;
158 }
159
160 // get the num docs and add to a metadata list
[28966]161 Element metadata_list = result_doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
[9234]162 result.appendChild(metadata_list);
163
164 // Add a metadata element specifying the number of matching documents
165 long numdocs = Long.parseLong(results_num);
[28966]166 GSXML.addMetadata(metadata_list, "numDocsMatched", ""+numdocs);
[9234]167 String [] ids = doc_ids.split(" ");
168
169 for (int d=0; d<ids.length; d++) {
[28966]170 Element doc_node = result_doc.createElement(GSXML.DOC_NODE_ELEM);
[9234]171 doc_node.setAttribute(GSXML.NODE_ID_ATT, ids[d]);
172 doc_node_list.appendChild(doc_node);
173 }
174 return result;
175 }
176
[10193]177 protected String checkFieldParam(String fields) {
178
179 if (fields == null) {
180 // return the default
181 return "";
182 }
183 StringBuffer new_fields = new StringBuffer();
184 String [] ids = fields.split(",");
185 for (int i=0; i<ids.length; i++) {
186 if(index_ids.contains(ids[i])) {
187 new_fields.append(ids[i]);
188 new_fields.append(",");
189 }
190 }
191 if (new_fields.length() == 0) {
192 return "";
193 }
194 return new_fields.toString();
195 }
[9273]196 /**
197 An IVia server has a fixed list of fields to search (I think) so we can hard code them here rather than reading them in from a config file
198 */
[25635]199 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names,String lang){
[10193]200 index_ids.addAll(this.index_ids);
[9273]201 index_names.add(getTextString("param."+INDEX_PARAM+".kw", lang));
202 index_names.add(getTextString("param."+INDEX_PARAM+".au", lang));
203 index_names.add(getTextString("param."+INDEX_PARAM+".su", lang));
204 index_names.add(getTextString("param."+INDEX_PARAM+".ti", lang));
205 index_names.add(getTextString("param."+INDEX_PARAM+".de", lang));
206 index_names.add(getTextString("param."+INDEX_PARAM+".fu", lang));
[9234]207 }
208
209}
Note: See TracBrowser for help on using the repository browser.