Changeset 25978
- Timestamp:
- 2012-07-19T14:29:07+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/IViaProxy.java
r25727 r25978 2 2 3 3 // Greenstone classes 4 import org.greenstone.gsdl3.util.*; 5 6 // XML classes 7 import org.w3c.dom.Element; 8 import org.w3c.dom.Document; 9 import org.w3c.dom.NodeList; 10 11 import java.util.HashMap; 12 import java.io.File; 4 import java.io.BufferedReader; 13 5 import java.io.InputStream; 14 import java.io.BufferedReader;15 6 import java.io.InputStreamReader; 16 import java.io.IOException;17 7 import java.io.Serializable; 18 8 import java.net.HttpURLConnection; 19 import java.net.URLConnection;20 9 import java.net.URL; 21 import java.net.Authenticator; 22 import java.net.MalformedURLException; 23 24 import org.apache.log4j.*; 10 import java.util.HashMap; 11 12 import org.apache.log4j.Logger; 13 import org.greenstone.gsdl3.util.GSPath; 14 import org.greenstone.gsdl3.util.GSXML; 15 import org.w3c.dom.Document; 16 import org.w3c.dom.Element; 17 import org.w3c.dom.NodeList; 25 18 26 19 /** 27 * 20 * 28 21 * @author Katherine Don 29 22 * @version $Revision$ 30 23 */ 31 24 32 public class IViaProxy 33 extends ServiceRack { 34 35 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.IViaProxy.class.getName()); 36 37 // the services on offer 38 // these strings must match what is found in the properties file 39 protected static final String TEXT_QUERY_SERVICE = "TextQuery"; 40 protected static final String DOC_CONTENT_SERVICE = "DocumentContentRetrieve"; 41 protected static final String DOC_META_SERVICE = "DocumentMetadataRetrieve"; 42 protected static final String QUERY_PARAM = "query"; 43 protected static final String FIELD_PARAM = "fields"; 44 // have standard gs param names for hits per page, and start page 45 // these need to be mapped to iVia params 46 protected static final String GS_HITS_PARAM = "hitsPerPage"; 47 protected static final String IM_HITS_PARAM = "no_of_records_per_page"; 48 protected static final String GS_START_PAGE_PARAM = "startPage"; 49 protected static final String IM_START_PAGE_PARAM = "start_page_no"; 50 51 protected String ivia_server_url = null; 52 53 public boolean configure(Element info, Element extra_info) { 54 55 if (!super.configure(info, extra_info)){ 56 return false; 57 } 58 59 Element server_elem = (Element)GSXML.getChildByTagName(info, "iViaServer"); 60 if (server_elem == null) { 61 logger.error("no iViaServer element found"); 62 return false; 63 } 64 ivia_server_url = server_elem.getAttribute("url"); 65 if (ivia_server_url.equals("")) { 66 logger.error("no url for the iViaServer element"); 67 return false; 68 } 69 Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM); 70 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); 71 tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE); 72 this.short_service_info.appendChild(tq_service); 73 74 Element dc_service = this.doc.createElement(GSXML.SERVICE_ELEM); 75 dc_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); 76 dc_service.setAttribute(GSXML.NAME_ATT, DOC_CONTENT_SERVICE); 77 this.short_service_info.appendChild(dc_service); 78 79 Element dm_service = this.doc.createElement(GSXML.SERVICE_ELEM); 80 dm_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); 81 dm_service.setAttribute(GSXML.NAME_ATT, DOC_META_SERVICE); 82 this.short_service_info.appendChild(dm_service); 83 84 // 85 // add some format info to service map if there is any 86 String path = GSPath.appendLink(GSXML.SEARCH_ELEM, GSXML.FORMAT_ELEM); 87 Element format = (Element) GSXML.getNodeByPath(extra_info, path); 88 if (format != null) { 89 this.format_info_map.put(TEXT_QUERY_SERVICE, this.doc.importNode(format, true)); 90 } 91 92 93 // look for document display format 94 path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM); 95 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path); 96 if (display_format != null) { 97 this.format_info_map.put(DOC_CONTENT_SERVICE, this.doc.importNode(display_format, true)); 98 // shoudl we make a copy? 99 } 100 101 return true; 102 103 } 104 105 protected Element getServiceDescription(String service, String lang, String subset) { 106 107 if (service.equals(TEXT_QUERY_SERVICE)) { 108 Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM); 109 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); 110 tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE); 111 if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER)) { 112 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(TEXT_QUERY_SERVICE+".name", lang))); 113 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(TEXT_QUERY_SERVICE+".submit", lang))); 114 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(TEXT_QUERY_SERVICE+".description", lang))); 115 } 116 if (subset == null || subset.equals(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER)) { 117 Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 118 tq_service.appendChild(param_list); 119 Element param = GSXML.createParameterDescription(this.doc, QUERY_PARAM, getTextString("param."+QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null); 120 param_list.appendChild(param); 121 String [] field_ids = {"kw", "au", "su", "ti", "de", "fu"}; 122 String [] field_names = { 123 getTextString("param."+FIELD_PARAM+".kw", lang), 124 getTextString("param."+FIELD_PARAM+".au", lang), 125 getTextString("param."+FIELD_PARAM+".su", lang), 126 getTextString("param."+FIELD_PARAM+".ti", lang), 127 getTextString("param."+FIELD_PARAM+".de", lang), 128 getTextString("param."+FIELD_PARAM+".fu", lang) }; 129 130 param = GSXML.createParameterDescription(this.doc, FIELD_PARAM, getTextString("param."+FIELD_PARAM, lang), GSXML.PARAM_TYPE_ENUM_MULTI, "kw,au,su,ti,de,fu", field_ids, field_names); 131 param_list.appendChild(param); 132 133 134 String [] hits_options = {"10", "30", "50"}; 135 param = GSXML.createParameterDescription(this.doc, GS_HITS_PARAM, getTextString("param."+GS_HITS_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, "10", hits_options, hits_options); 136 param_list.appendChild(param); 137 138 param = GSXML.createParameterDescription(this.doc, GS_START_PAGE_PARAM, "", GSXML.PARAM_TYPE_INVISIBLE, "1", null, null); 139 param_list.appendChild(param); 140 } 141 return tq_service; 142 } 143 if (service.equals(DOC_META_SERVICE)) { 144 Element dm_service = this.doc.createElement(GSXML.SERVICE_ELEM); 145 dm_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); 146 dm_service.setAttribute(GSXML.NAME_ATT, DOC_META_SERVICE); 147 return dm_service; 148 149 } 150 if (service.equals(DOC_CONTENT_SERVICE)) { 151 Element dc_service = this.doc.createElement(GSXML.SERVICE_ELEM); 152 dc_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); 153 dc_service.setAttribute(GSXML.NAME_ATT, DOC_CONTENT_SERVICE); 154 return dc_service; 155 156 157 } 158 return null; 159 } 160 161 /** Process a text query - implemented by concrete subclasses */ 162 protected Element processTextQuery(Element request) { 163 164 // Create a new (empty) result message 165 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 166 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE); 167 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 168 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 169 result.appendChild(doc_node_list); 170 171 172 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 173 if (param_list == null) { 174 logger.error("TextQuery request had no paramList."); 175 return result; // Return the empty result 176 } 177 178 // Process the request parameters 179 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false); 180 181 // Make sure a query has been specified 182 String query = (String) params.get(QUERY_PARAM); 183 if (query == null || query.equals("")) { 184 return result; // Return the empty result 185 } 186 // tidy whitespace 187 query = query.replaceAll("\\s+", "+"); 188 String url_string = ivia_server_url+"/cgi-bin/canned_search?theme=gsdl3&query="+query; 189 190 // check for fields 191 String fields = (String) params.get(FIELD_PARAM); 192 if (fields != null && !fields.equals("")) { 193 url_string += "&fields="+fields; 194 } 195 196 //check for hits per page 197 String hits_per_page = (String) params.get(GS_HITS_PARAM); 198 if (hits_per_page != null && !hits_per_page.equals("")) { 199 url_string += "&"+IM_HITS_PARAM+"="+hits_per_page; 200 } 201 202 // check for start page 203 String start_page = (String) params.get(GS_START_PAGE_PARAM); 204 if (start_page != null && !start_page.equals("")) { 205 url_string += "&"+IM_START_PAGE_PARAM+"="+start_page; 206 } 207 String results_num = null; 208 String doc_ids = null; 209 try { 210 logger.debug("IViaProxy, sending "+url_string); 211 BufferedReader reader = makeConnection(url_string); 212 results_num = reader.readLine(); 213 doc_ids = reader.readLine(); 214 215 } catch (Exception e) { 216 logger.error("exception happened during query"); 217 e.printStackTrace(); 218 return result; 219 } 220 221 if (results_num.startsWith("Resources: ")) { 222 results_num = results_num.substring(11); 223 } else { 224 logger.error("badly formatted results line: "+results_num); 225 return result; 226 } 227 if (doc_ids.startsWith("Ids: ")) { 228 doc_ids = doc_ids.substring(5).trim(); 229 } else { 230 logger.error("badly formatted docs line: "+doc_ids); 231 return result; 232 } 233 234 // get the num docs and add to a metadata list 235 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); 236 result.appendChild(metadata_list); 237 238 // Add a metadata element specifying the number of matching documents 239 long numdocs = Long.parseLong(results_num); 240 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", ""+numdocs); 241 String [] ids = doc_ids.split(" "); 242 243 for (int d=0; d<ids.length; d++) { 244 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 245 doc_node.setAttribute(GSXML.NODE_ID_ATT, ids[d]); 246 doc_node_list.appendChild(doc_node); 247 } 248 logger.debug("IViaProxy result:"); 249 logger.debug(this.converter.getString(result)); 250 return result; 251 252 } 253 254 protected Element processDocumentMetadataRetrieve(Element request) { 255 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 256 result.setAttribute(GSXML.FROM_ATT, DOC_META_SERVICE); 257 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 258 259 // Get the parameters of the request 260 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 261 if (param_list == null) { 262 logger.error("missing paramList.\n"); 263 return result; // Return the empty result 264 } 265 266 // The metadata information required 267 StringBuffer field_list = new StringBuffer(); 268 Element param = GSXML.getFirstElementChild(param_list);//(Element) param_list.getFirstChild(); 269 while (param != null) { 270 // Identify the metadata information desired 271 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) { 272 String metadata = GSXML.getValue(param); 273 if (isAcceptableMetadata(metadata)) { 274 field_list.append(metadata); 275 field_list.append(","); 276 } 277 } 278 param = (Element) param.getNextSibling(); 279 } 280 281 if (field_list.length()==0) { 282 logger.error("no metadata specified.\n"); 283 return result; 284 } 285 286 // Get the documents 287 Element request_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 288 if (request_node_list == null) { 289 logger.error("DocumentMetadataRetrieve request had no "+GSXML.DOC_NODE_ELEM+"List.\n"); 290 return result; 291 } 292 293 StringBuffer record_id_list = new StringBuffer(); 294 295 NodeList request_nodes = request_node_list.getChildNodes(); 296 for (int i = 0; i < request_nodes.getLength(); i++) { 297 Element request_node = (Element) request_nodes.item(i); 298 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT); 299 record_id_list.append(node_id); 300 record_id_list.append(","); 301 } 302 303 // do the query to the iVia server 304 String url_string = ivia_server_url+"/cgi-bin/view_record_set?theme=gsdl3&record_id_list="+record_id_list.toString()+"&field_list="+field_list.toString(); 305 306 Element node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 307 result.appendChild(node_list); 308 try { 309 BufferedReader reader = makeConnection(url_string); 310 String line; 311 while ((line = reader.readLine()) != null) { 312 if (!line.startsWith("Record:")) { 313 continue; 314 } 315 // the first line is the record 316 line=line.substring(8); 25 public class IViaProxy extends ServiceRack 26 { 27 28 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.IViaProxy.class.getName()); 29 30 // the services on offer 31 // these strings must match what is found in the properties file 32 protected static final String TEXT_QUERY_SERVICE = "TextQuery"; 33 protected static final String DOC_CONTENT_SERVICE = "DocumentContentRetrieve"; 34 protected static final String DOC_META_SERVICE = "DocumentMetadataRetrieve"; 35 protected static final String QUERY_PARAM = "query"; 36 protected static final String FIELD_PARAM = "fields"; 37 // have standard gs param names for hits per page, and start page 38 // these need to be mapped to iVia params 39 protected static final String GS_HITS_PARAM = "hitsPerPage"; 40 protected static final String IM_HITS_PARAM = "no_of_records_per_page"; 41 protected static final String GS_START_PAGE_PARAM = "startPage"; 42 protected static final String IM_START_PAGE_PARAM = "start_page_no"; 43 44 protected String ivia_server_url = null; 45 46 public boolean configure(Element info, Element extra_info) 47 { 48 49 if (!super.configure(info, extra_info)) 50 { 51 return false; 52 } 53 54 Element server_elem = (Element) GSXML.getChildByTagName(info, "iViaServer"); 55 if (server_elem == null) 56 { 57 logger.error("no iViaServer element found"); 58 return false; 59 } 60 ivia_server_url = server_elem.getAttribute("url"); 61 if (ivia_server_url.equals("")) 62 { 63 logger.error("no url for the iViaServer element"); 64 return false; 65 } 66 Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM); 67 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); 68 tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE); 69 this.short_service_info.appendChild(tq_service); 70 71 Element dc_service = this.doc.createElement(GSXML.SERVICE_ELEM); 72 dc_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); 73 dc_service.setAttribute(GSXML.NAME_ATT, DOC_CONTENT_SERVICE); 74 this.short_service_info.appendChild(dc_service); 75 76 Element dm_service = this.doc.createElement(GSXML.SERVICE_ELEM); 77 dm_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); 78 dm_service.setAttribute(GSXML.NAME_ATT, DOC_META_SERVICE); 79 this.short_service_info.appendChild(dm_service); 80 81 // 82 // add some format info to service map if there is any 83 String path = GSPath.appendLink(GSXML.SEARCH_ELEM, GSXML.FORMAT_ELEM); 84 Element format = (Element) GSXML.getNodeByPath(extra_info, path); 85 if (format != null) 86 { 87 this.format_info_map.put(TEXT_QUERY_SERVICE, this.doc.importNode(format, true)); 88 } 89 90 // look for document display format 91 path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM); 92 Element display_format = (Element) GSXML.getNodeByPath(extra_info, path); 93 if (display_format != null) 94 { 95 this.format_info_map.put(DOC_CONTENT_SERVICE, this.doc.importNode(display_format, true)); 96 // shoudl we make a copy? 97 } 98 99 return true; 100 101 } 102 103 protected Element getServiceDescription(String service, String lang, String subset) 104 { 105 106 if (service.equals(TEXT_QUERY_SERVICE)) 107 { 108 Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM); 109 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); 110 tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE); 111 if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER)) 112 { 113 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(TEXT_QUERY_SERVICE + ".name", lang))); 114 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(TEXT_QUERY_SERVICE + ".submit", lang))); 115 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(TEXT_QUERY_SERVICE + ".description", lang))); 116 } 117 if (subset == null || subset.equals(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER)) 118 { 119 Element param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); 120 tq_service.appendChild(param_list); 121 Element param = GSXML.createParameterDescription(this.doc, QUERY_PARAM, getTextString("param." + QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null); 122 param_list.appendChild(param); 123 String[] field_ids = { "kw", "au", "su", "ti", "de", "fu" }; 124 String[] field_names = { getTextString("param." + FIELD_PARAM + ".kw", lang), getTextString("param." + FIELD_PARAM + ".au", lang), getTextString("param." + FIELD_PARAM + ".su", lang), getTextString("param." + FIELD_PARAM + ".ti", lang), getTextString("param." + FIELD_PARAM + ".de", lang), getTextString("param." + FIELD_PARAM + ".fu", lang) }; 125 126 param = GSXML.createParameterDescription(this.doc, FIELD_PARAM, getTextString("param." + FIELD_PARAM, lang), GSXML.PARAM_TYPE_ENUM_MULTI, "kw,au,su,ti,de,fu", field_ids, field_names); 127 param_list.appendChild(param); 128 129 String[] hits_options = { "10", "30", "50" }; 130 param = GSXML.createParameterDescription(this.doc, GS_HITS_PARAM, getTextString("param." + GS_HITS_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, "10", hits_options, hits_options); 131 param_list.appendChild(param); 132 133 param = GSXML.createParameterDescription(this.doc, GS_START_PAGE_PARAM, "", GSXML.PARAM_TYPE_INVISIBLE, "1", null, null); 134 param_list.appendChild(param); 135 } 136 return tq_service; 137 } 138 if (service.equals(DOC_META_SERVICE)) 139 { 140 Element dm_service = this.doc.createElement(GSXML.SERVICE_ELEM); 141 dm_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); 142 dm_service.setAttribute(GSXML.NAME_ATT, DOC_META_SERVICE); 143 return dm_service; 144 145 } 146 if (service.equals(DOC_CONTENT_SERVICE)) 147 { 148 Element dc_service = this.doc.createElement(GSXML.SERVICE_ELEM); 149 dc_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); 150 dc_service.setAttribute(GSXML.NAME_ATT, DOC_CONTENT_SERVICE); 151 return dc_service; 152 153 } 154 return null; 155 } 156 157 /** Process a text query - implemented by concrete subclasses */ 158 protected Element processTextQuery(Element request) 159 { 160 161 // Create a new (empty) result message 162 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 163 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE); 164 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 165 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER); 166 result.appendChild(doc_node_list); 167 168 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); 169 if (param_list == null) 170 { 171 logger.error("TextQuery request had no paramList."); 172 return result; // Return the empty result 173 } 174 175 // Process the request parameters 176 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false); 177 178 // Make sure a query has been specified 179 String query = (String) params.get(QUERY_PARAM); 180 if (query == null || query.equals("")) 181 { 182 return result; // Return the empty result 183 } 184 // tidy whitespace 185 query = query.replaceAll("\\s+", "+"); 186 String url_string = ivia_server_url + "/cgi-bin/canned_search?theme=gsdl3&query=" + query; 187 188 // check for fields 189 String fields = (String) params.get(FIELD_PARAM); 190 if (fields != null && !fields.equals("")) 191 { 192 url_string += "&fields=" + fields; 193 } 194 195 //check for hits per page 196 String hits_per_page = (String) params.get(GS_HITS_PARAM); 197 if (hits_per_page != null && !hits_per_page.equals("")) 198 { 199 url_string += "&" + IM_HITS_PARAM + "=" + hits_per_page; 200 } 201 202 // check for start page 203 String start_page = (String) params.get(GS_START_PAGE_PARAM); 204 if (start_page != null && !start_page.equals("")) 205 { 206 url_string += "&" + IM_START_PAGE_PARAM + "=" + start_page; 207 } 208 String results_num = null; 209 String doc_ids = null; 210 try 211 { 212 logger.debug("IViaProxy, sending " + url_string); 213 BufferedReader reader = makeConnection(url_string); 214 results_num = reader.readLine(); 215 doc_ids = reader.readLine(); 216 217 } 218 catch (Exception e) 219 { 220 logger.error("exception happened during query"); 221 e.printStackTrace(); 222 return result; 223 } 224 225 if (results_num.startsWith("Resources: ")) 226 { 227 results_num = results_num.substring(11); 228 } 229 else 230 { 231 logger.error("badly formatted results line: " + results_num); 232 return result; 233 } 234 if (doc_ids.startsWith("Ids: ")) 235 { 236 doc_ids = doc_ids.substring(5).trim(); 237 } 238 else 239 { 240 logger.error("badly formatted docs line: " + doc_ids); 241 return result; 242 } 243 244 // get the num docs and add to a metadata list 245 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER); 246 result.appendChild(metadata_list); 247 248 // Add a metadata element specifying the number of matching documents 249 long numdocs = Long.parseLong(results_num); 250 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "" + numdocs); 251 String[] ids = doc_ids.split(" "); 252 253 for (int d = 0; d < ids.length; d++) 254 { 255 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 256 doc_node.setAttribute(GSXML.NODE_ID_ATT, ids[d]); 257 doc_node_list.appendChild(doc_node); 258 } 259 logger.debug("IViaProxy result:"); 260 logger.debug(this.converter.getString(result)); 261 return result; 262 263 } 264 265 protected Element processDocumentMetadataRetrieve(Element request) 266 { 267 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 268 result.setAttribute(GSXML.FROM_ATT, DOC_META_SERVICE); 269 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 270 271 // Get the parameters of the request 272 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER); 273 if (param_list == null) 274 { 275 logger.error("missing paramList.\n"); 276 return result; // Return the empty result 277 } 278 279 // The metadata information required 280 StringBuffer field_list = new StringBuffer(); 281 Element param = GSXML.getFirstElementChild(param_list);//(Element) param_list.getFirstChild(); 282 while (param != null) 283 { 284 // Identify the metadata information desired 285 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) 286 { 287 String metadata = GSXML.getValue(param); 288 if (isAcceptableMetadata(metadata)) 289 { 290 field_list.append(metadata); 291 field_list.append(","); 292 } 293 } 294 param = (Element) param.getNextSibling(); 295 } 296 297 if (field_list.length() == 0) 298 { 299 logger.error("no metadata specified.\n"); 300 return result; 301 } 302 303 // Get the documents 304 Element request_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER); 305 if (request_node_list == null) 306 { 307 logger.error("DocumentMetadataRetrieve request had no " + GSXML.DOC_NODE_ELEM + "List.\n"); 308 return result; 309 } 310 311 StringBuffer record_id_list = new StringBuffer(); 312 313 NodeList request_nodes = request_node_list.getChildNodes(); 314 for (int i = 0; i < request_nodes.getLength(); i++) 315 { 316 Element request_node = (Element) request_nodes.item(i); 317 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT); 318 record_id_list.append(node_id); 319 record_id_list.append(","); 320 } 321 322 // do the query to the iVia server 323 String url_string = ivia_server_url + "/cgi-bin/view_record_set?theme=gsdl3&record_id_list=" + record_id_list.toString() + "&field_list=" + field_list.toString(); 324 325 Element node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER); 326 result.appendChild(node_list); 327 try 328 { 329 BufferedReader reader = makeConnection(url_string); 330 String line; 331 while ((line = reader.readLine()) != null) 332 { 333 if (!line.startsWith("Record:")) 334 { 335 continue; 336 } 337 // the first line is the record 338 line = line.substring(8); 339 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 340 doc_node.setAttribute(GSXML.NODE_ID_ATT, line); 341 Element meta_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER); 342 doc_node.appendChild(meta_list); 343 while ((line = reader.readLine()) != null) 344 { 345 //metadata entry 346 int col_pos = line.indexOf(':'); 347 if (col_pos == -1) 348 { 349 // end of the metadata for this doc 350 break; 351 } 352 String name = line.substring(0, col_pos); 353 String value = line.substring(col_pos + 2); // includes a space 354 GSXML.addMetadata(this.doc, meta_list, name, value); 355 } 356 node_list.appendChild(doc_node); 357 358 } 359 } 360 catch (Exception e) 361 { 362 logger.error("exception happened"); 363 e.printStackTrace(); 364 } 365 logger.debug("IViaProxy: returning result: "); 366 logger.debug(this.converter.getPrettyString(result)); 367 return result; 368 369 } 370 371 protected Element processDocumentContentRetrieve(Element request) 372 { 373 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 374 result.setAttribute(GSXML.FROM_ATT, DOC_CONTENT_SERVICE); 375 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 376 377 // Get the request doc_list 378 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER); 379 if (query_doc_list == null) 380 { 381 logger.error("DocumentContentRetrieve request specified no doc nodes.\n"); 382 return result; 383 } 384 385 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER); 386 result.appendChild(doc_list); 387 388 // Get the documents 389 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list, GSXML.NODE_ID_ATT); 390 for (int i = 0; i < doc_ids.length; i++) 391 { 392 String doc_id = doc_ids[i]; 393 Element doc_node = getDocument(doc_id); 394 doc_list.appendChild(doc_node); 395 } 396 return result; 397 398 } 399 400 /** 401 * gets a document by sending a request to iVia, then processes it and 402 * creates a documentNode around the text 403 */ 404 protected Element getDocument(String doc_id) 405 { 406 407 String url_string = ivia_server_url + "/cgi-bin/view_record?theme=gsdl3&record_id=" + doc_id; 408 StringBuffer buffer = new StringBuffer(); 409 try 410 { 411 BufferedReader reader = makeConnection(url_string); 412 413 String line; 414 while ((line = reader.readLine()) != null) 415 { 416 buffer.append(line); 417 } 418 419 } 420 catch (Exception e) 421 { 422 logger.error("exception happened"); 423 e.printStackTrace(); 424 } 425 426 String node_content = buffer.toString(); 427 String escaped_content = GSXML.xmlSafe(node_content); 428 429 StringBuffer processed_content = new StringBuffer(escaped_content.length()); 430 processed_content.append("<nodeContent>"); 431 int pos = 0; 432 int lastpos = 0; 433 while ((pos = escaped_content.indexOf("<a ", lastpos)) != -1) 434 { 435 processed_content.append(escaped_content.substring(lastpos, pos)); 436 int endpos = escaped_content.indexOf("</a>", pos); 437 if (endpos == -1) 438 { 439 break; 440 } 441 String link = escaped_content.substring(pos, endpos + 10); 442 link = convertLink(link); 443 processed_content.append(link); 444 lastpos = endpos + 10; 445 } 446 processed_content.append(escaped_content.substring(lastpos)); // get the last bit 447 processed_content.append("</nodeContent>"); 448 317 449 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 318 doc_node.setAttribute(GSXML.NODE_ID_ATT, line); 319 Element meta_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); 320 doc_node.appendChild(meta_list); 321 while ((line = reader.readLine()) != null) { 322 //metadata entry 323 int col_pos = line.indexOf(':'); 324 if (col_pos == -1) { 325 // end of the metadata for this doc 326 break; 327 } 328 String name = line.substring(0,col_pos); 329 String value = line.substring(col_pos+2); // includes a space 330 GSXML.addMetadata(this.doc, meta_list, name, value); 331 } 332 node_list.appendChild(doc_node); 333 334 } 335 } catch (Exception e) { 336 logger.error("exception happened"); 337 e.printStackTrace(); 338 } 339 logger.debug("IViaProxy: returning result: "); 340 logger.debug(this.converter.getPrettyString(result)); 341 return result; 342 343 } 344 345 protected Element processDocumentContentRetrieve(Element request) { 346 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 347 result.setAttribute(GSXML.FROM_ATT, DOC_CONTENT_SERVICE); 348 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 349 350 // Get the request doc_list 351 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 352 if (query_doc_list == null) { 353 logger.error("DocumentContentRetrieve request specified no doc nodes.\n"); 354 return result; 355 } 356 357 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 358 result.appendChild(doc_list); 359 360 // Get the documents 361 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list, 362 GSXML.NODE_ID_ATT); 363 for (int i = 0; i < doc_ids.length; i++) { 364 String doc_id = doc_ids[i]; 365 Element doc_node = getDocument(doc_id); 366 doc_list.appendChild(doc_node); 367 } 368 return result; 369 370 } 371 372 373 /** gets a document by sending a request to iVia, then processes it and creates a documentNode around the text */ 374 protected Element getDocument(String doc_id) { 375 376 String url_string = ivia_server_url+"/cgi-bin/view_record?theme=gsdl3&record_id="+doc_id; 377 StringBuffer buffer = new StringBuffer(); 378 try { 379 BufferedReader reader = makeConnection(url_string); 380 381 String line; 382 while((line = reader.readLine())!= null) { 383 buffer.append(line); 384 } 385 386 387 } catch (Exception e) { 388 logger.error("exception happened"); 389 e.printStackTrace(); 390 } 391 392 String node_content = buffer.toString(); 393 String escaped_content = GSXML.xmlSafe(node_content); 394 395 StringBuffer processed_content = new StringBuffer(escaped_content.length()); 396 processed_content.append("<nodeContent>"); 397 int pos = 0; 398 int lastpos = 0; 399 while ((pos = escaped_content.indexOf("<a ", lastpos))!= -1) { 400 processed_content.append(escaped_content.substring(lastpos, pos)); 401 int endpos = escaped_content.indexOf("</a>", pos); 402 if (endpos == -1) { 403 break; 404 } 405 String link = escaped_content.substring(pos, endpos+10); 406 link = convertLink(link); 407 processed_content.append(link); 408 lastpos = endpos+10; 409 } 410 processed_content.append(escaped_content.substring(lastpos)); // get the last bit 411 processed_content.append("</nodeContent>"); 412 413 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); 414 doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_id); 415 416 Document content_doc = this.converter.getDOM(processed_content.toString()); 417 if (content_doc != null) { 418 Element content_element = content_doc.getDocumentElement(); 419 doc_node.appendChild(this.doc.importNode(content_element, true)); 420 } else { 421 logger.error("Couldn't parse the node content"); 422 } 423 return doc_node; 424 425 } 426 427 /** converts a url from an <a> element into a greenstone suitable one */ 428 protected String convertLink(String aref) { 429 430 if (aref.indexOf("href="http") != -1) { 431 return aref; // an external link 432 } 433 String type = "other"; 434 if (aref.indexOf("/cgi-bin/canned_search")!=-1) { 435 type="query"; 436 } else if (aref.indexOf("/cgi-bin/click_through") != -1) { 437 type = "external"; 438 } else if (aref.indexOf("/cgi-bin/view_record") != -1) { 439 type="document"; 440 } 441 442 int href_start = aref.indexOf("href="")+11; 443 int href_end = aref.indexOf(">", href_start); 444 String href = aref.substring(href_start, href_end); 445 String link_content = aref.substring(href_end+4, aref.length()-10); 446 447 if (type.equals("external")) { 448 // the external link is everything after the http at the end. 449 String address = href.substring(href.lastIndexOf("http")); 450 address = address.replaceAll("%3[aA]", ":"); 451 address = address.replaceAll("%2[fF]", "/"); 452 453 return "<a href=\""+address+"\">"+link_content+"</a>"; 454 } 455 if (type.equals("other")) { 456 return "other type of link ("+link_content+")"; 457 } 458 StringBuffer result = new StringBuffer(); 459 result.append("<link type='"); 460 result.append(type); 461 result.append("'"); 462 if (type.equals("query")) { 463 result.append(" service='TextQuery'"); 464 } 465 result.append(">"); 466 // add in the parameters 467 href = href.substring(href.indexOf("?")+1); 468 String [] params = href.split("&"); 469 for (int i=0; i<params.length; i++) { 470 String param = params[i]; 471 int eq_pos = param.indexOf("="); 472 if (eq_pos != -1) { 473 474 result.append("<param name='"+param.substring(0, eq_pos)+"' value='"+param.substring(eq_pos+1)+"'/>"); 475 } 476 } 477 result.append(link_content); 478 result.append("</link>"); 479 480 return result.toString(); 481 482 } 483 484 // iVia craps out if we ask for a metadata which is not valid. So need 485 // to make sure we only ask for acceptable fields. 486 protected boolean isAcceptableMetadata(String meta) { 487 String valid_metadata = ",title,url,ivia_description,keywords,subjects,"; 488 if (valid_metadata.indexOf(","+meta+",")!=-1) { 489 return true; 490 } 491 return false; 492 } 493 protected BufferedReader makeConnection(String url_string) { 494 BufferedReader reader = null; 495 try { 496 URL url = new URL(url_string); 497 HttpURLConnection connection = (HttpURLConnection)url.openConnection(); 498 InputStream input = connection.getInputStream(); 499 reader = new BufferedReader(new InputStreamReader(input)); 500 } catch (java.net.MalformedURLException e) { 501 502 logger.error("Malformed URL: "+url_string); 503 } catch (java.io.IOException e) { 504 logger.error("An error occurred during IO to url "+url_string); 505 } 506 return reader; 507 } 450 doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_id); 451 452 Document content_doc = this.converter.getDOM(processed_content.toString()); 453 if (content_doc != null) 454 { 455 Element content_element = content_doc.getDocumentElement(); 456 doc_node.appendChild(this.doc.importNode(content_element, true)); 457 } 458 else 459 { 460 logger.error("Couldn't parse the node content"); 461 } 462 return doc_node; 463 464 } 465 466 /** converts a url from an <a> element into a greenstone suitable one */ 467 protected String convertLink(String aref) 468 { 469 470 if (aref.indexOf("href="http") != -1) 471 { 472 return aref; // an external link 473 } 474 String type = "other"; 475 if (aref.indexOf("/cgi-bin/canned_search") != -1) 476 { 477 type = "query"; 478 } 479 else if (aref.indexOf("/cgi-bin/click_through") != -1) 480 { 481 type = "external"; 482 } 483 else if (aref.indexOf("/cgi-bin/view_record") != -1) 484 { 485 type = "document"; 486 } 487 488 int href_start = aref.indexOf("href="") + 11; 489 int href_end = aref.indexOf(">", href_start); 490 String href = aref.substring(href_start, href_end); 491 String link_content = aref.substring(href_end + 4, aref.length() - 10); 492 493 if (type.equals("external")) 494 { 495 // the external link is everything after the http at the end. 496 String address = href.substring(href.lastIndexOf("http")); 497 address = address.replaceAll("%3[aA]", ":"); 498 address = address.replaceAll("%2[fF]", "/"); 499 500 return "<a href=\"" + address + "\">" + link_content + "</a>"; 501 } 502 if (type.equals("other")) 503 { 504 return "other type of link (" + link_content + ")"; 505 } 506 StringBuffer result = new StringBuffer(); 507 result.append("<link type='"); 508 result.append(type); 509 result.append("'"); 510 if (type.equals("query")) 511 { 512 result.append(" service='TextQuery'"); 513 } 514 result.append(">"); 515 // add in the parameters 516 href = href.substring(href.indexOf("?") + 1); 517 String[] params = href.split("&"); 518 for (int i = 0; i < params.length; i++) 519 { 520 String param = params[i]; 521 int eq_pos = param.indexOf("="); 522 if (eq_pos != -1) 523 { 524 525 result.append("<param name='" + param.substring(0, eq_pos) + "' value='" + param.substring(eq_pos + 1) + "'/>"); 526 } 527 } 528 result.append(link_content); 529 result.append("</link>"); 530 531 return result.toString(); 532 533 } 534 535 // iVia craps out if we ask for a metadata which is not valid. So need 536 // to make sure we only ask for acceptable fields. 537 protected boolean isAcceptableMetadata(String meta) 538 { 539 String valid_metadata = ",title,url,ivia_description,keywords,subjects,"; 540 if (valid_metadata.indexOf("," + meta + ",") != -1) 541 { 542 return true; 543 } 544 return false; 545 } 546 547 protected BufferedReader makeConnection(String url_string) 548 { 549 BufferedReader reader = null; 550 try 551 { 552 URL url = new URL(url_string); 553 HttpURLConnection connection = (HttpURLConnection) url.openConnection(); 554 InputStream input = connection.getInputStream(); 555 reader = new BufferedReader(new InputStreamReader(input)); 556 } 557 catch (java.net.MalformedURLException e) 558 { 559 560 logger.error("Malformed URL: " + url_string); 561 } 562 catch (java.io.IOException e) 563 { 564 logger.error("An error occurred during IO to url " + url_string); 565 } 566 return reader; 567 } 508 568 509 569 }
Note:
See TracChangeset
for help on using the changeset viewer.