package org.greenstone.gsdl3.service; // Greenstone classes import org.greenstone.gsdl3.util.*; // XML classes import org.w3c.dom.Element; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import java.util.HashMap; import java.io.File; import java.io.InputStream; import java.io.BufferedReader; import java.io.InputStreamReader; import java.io.IOException; import java.net.HttpURLConnection; import java.net.URLConnection; import java.net.URL; import java.net.Authenticator; import java.net.MalformedURLException; /** * * @author Katherine Don * @version $Revision: 9874 $ */ public class IViaProxy extends ServiceRack { // the services on offer // these strings must match what is found in the properties file protected static final String TEXT_QUERY_SERVICE = "TextQuery"; protected static final String DOC_CONTENT_SERVICE = "DocumentContentRetrieve"; protected static final String DOC_META_SERVICE = "DocumentMetadataRetrieve"; protected static final String QUERY_PARAM = "query"; protected static final String FIELD_PARAM = "fields"; // have standard gs param names for hits per page, and start page // these need to be mapped to iVia params protected static final String GS_HITS_PARAM = "hitsPerPage"; protected static final String IM_HITS_PARAM = "no_of_records_per_page"; protected static final String GS_START_PAGE_PARAM = "startPage"; protected static final String IM_START_PAGE_PARAM = "start_page_no"; protected String ivia_server_url = null; public boolean configure(Element info, Element extra_info) { Element server_elem = (Element)GSXML.getChildByTagName(info, "iViaServer"); if (server_elem == null) { System.err.println("IViaProxy.configure error: no iViaServer element found"); return false; } ivia_server_url = server_elem.getAttribute("url"); if (ivia_server_url.equals("")) { System.err.println("IViaProxy.configure error: no url for the iViaServer element"); return false; } Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM); tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE); this.short_service_info.appendChild(tq_service); Element dc_service = this.doc.createElement(GSXML.SERVICE_ELEM); dc_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); dc_service.setAttribute(GSXML.NAME_ATT, DOC_CONTENT_SERVICE); this.short_service_info.appendChild(dc_service); Element dm_service = this.doc.createElement(GSXML.SERVICE_ELEM); dm_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); dm_service.setAttribute(GSXML.NAME_ATT, DOC_META_SERVICE); this.short_service_info.appendChild(dm_service); // // add some format info to service map if there is any String path = GSPath.appendLink(GSXML.SEARCH_ELEM, GSXML.FORMAT_ELEM); Element format = (Element) GSXML.getNodeByPath(extra_info, path); if (format != null) { this.format_info_map.put(TEXT_QUERY_SERVICE, this.doc.importNode(format, true)); } // look for document display format path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM); Element display_format = (Element)GSXML.getNodeByPath(extra_info, path); if (display_format != null) { this.format_info_map.put(DOC_CONTENT_SERVICE, this.doc.importNode(display_format, true)); // shoudl we make a copy? } return true; } protected Element getServiceDescription(String service, String lang, String subset) { if (service.equals(TEXT_QUERY_SERVICE)) { Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM); tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE); if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER)) { tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(TEXT_QUERY_SERVICE+".name", lang))); tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(TEXT_QUERY_SERVICE+".submit", lang))); tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(TEXT_QUERY_SERVICE+".description", lang))); } if (subset == null || subset.equals(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER)) { Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); tq_service.appendChild(param_list); Element param = GSXML.createParameterDescription(this.doc, QUERY_PARAM, getTextString("param."+QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null); param_list.appendChild(param); String [] field_ids = {"kw", "au", "su", "ti", "de", "fu"}; String [] field_names = { getTextString("param."+FIELD_PARAM+".kw", lang), getTextString("param."+FIELD_PARAM+".au", lang), getTextString("param."+FIELD_PARAM+".su", lang), getTextString("param."+FIELD_PARAM+".ti", lang), getTextString("param."+FIELD_PARAM+".de", lang), getTextString("param."+FIELD_PARAM+".fu", lang) }; param = GSXML.createParameterDescription(this.doc, FIELD_PARAM, getTextString("param."+FIELD_PARAM, lang), GSXML.PARAM_TYPE_ENUM_MULTI, "kw,au,su,ti,de,fu", field_ids, field_names); param_list.appendChild(param); String [] hits_options = {"10", "30", "50"}; param = GSXML.createParameterDescription(this.doc, GS_HITS_PARAM, getTextString("param."+GS_HITS_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, "10", hits_options, hits_options); param_list.appendChild(param); param = GSXML.createParameterDescription(this.doc, GS_START_PAGE_PARAM, "", GSXML.PARAM_TYPE_INVISIBLE, "1", null, null); param_list.appendChild(param); } return tq_service; } if (service.equals(DOC_META_SERVICE)) { Element dm_service = this.doc.createElement(GSXML.SERVICE_ELEM); dm_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); dm_service.setAttribute(GSXML.NAME_ATT, DOC_META_SERVICE); return dm_service; } if (service.equals(DOC_CONTENT_SERVICE)) { Element dc_service = this.doc.createElement(GSXML.SERVICE_ELEM); dc_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); dc_service.setAttribute(GSXML.NAME_ATT, DOC_CONTENT_SERVICE); return dc_service; } return null; } /** Process a text query - implemented by concrete subclasses */ protected Element processTextQuery(Element request) { // Create a new (empty) result message Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE); result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); result.appendChild(doc_node_list); Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); if (param_list == null) { System.err.println("IViaProxy Error:: TextQuery request had no paramList."); return result; // Return the empty result } // Process the request parameters HashMap params = GSXML.extractParams(param_list, false); // Make sure a query has been specified String query = (String) params.get(QUERY_PARAM); if (query == null || query.equals("")) { return result; // Return the empty result } // tidy whitespace query = query.replaceAll("\\s+", "+"); String url_string = ivia_server_url+"/cgi-bin/canned_search?theme=gsdl3&query="+query; // check for fields String fields = (String) params.get(FIELD_PARAM); if (fields != null && !fields.equals("")) { url_string += "&fields="+fields; } //check for hits per page String hits_per_page = (String) params.get(GS_HITS_PARAM); if (hits_per_page != null && !hits_per_page.equals("")) { url_string += "&"+IM_HITS_PARAM+"="+hits_per_page; } // check for start page String start_page = (String) params.get(GS_START_PAGE_PARAM); if (start_page != null && !start_page.equals("")) { url_string += "&"+IM_START_PAGE_PARAM+"="+start_page; } String results_num = null; String doc_ids = null; try { ///ystem.err.println("IViaProxy, sending "+url_string); BufferedReader reader = makeConnection(url_string); results_num = reader.readLine(); doc_ids = reader.readLine(); } catch (Exception e) { System.err.println("IViaProxy.TextQuery Error: exception happened during query"); e.printStackTrace(); return result; } if (results_num.startsWith("Resources: ")) { results_num = results_num.substring(11); } else { System.err.println("IViaProxy.TextQuery Error: badly formatted results line: "+results_num); return result; } if (doc_ids.startsWith("Ids: ")) { doc_ids = doc_ids.substring(5).trim(); } else { System.err.println("IViaProxy.TextQuery Error: badly formatted docs line: "+doc_ids); return result; } // get the num docs and add to a metadata list Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); result.appendChild(metadata_list); // Add a metadata element specifying the number of matching documents long numdocs = Long.parseLong(results_num); GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", ""+numdocs); String [] ids = doc_ids.split(" "); for (int d=0; d"); int pos = 0; int lastpos = 0; while ((pos = escaped_content.indexOf("<a ", lastpos))!= -1) { processed_content.append(escaped_content.substring(lastpos, pos)); int endpos = escaped_content.indexOf("</a>", pos); if (endpos == -1) { break; } String link = escaped_content.substring(pos, endpos+10); link = convertLink(link); processed_content.append(link); lastpos = endpos+10; } processed_content.append(escaped_content.substring(lastpos)); // get the last bit processed_content.append(""); Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM); doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_id); Document content_doc = this.converter.getDOM(processed_content.toString()); if (content_doc != null) { Element content_element = content_doc.getDocumentElement(); doc_node.appendChild(this.doc.importNode(content_element, true)); } else { System.err.println("IViaProxy.getDocument Error: Couldn't parse the node content"); } return doc_node; } /** converts a url from an element into a greenstone suitable one */ protected String convertLink(String aref) { if (aref.indexOf("href="http") != -1) { return aref; // an external link } String type = "other"; if (aref.indexOf("/cgi-bin/canned_search")!=-1) { type="query"; } else if (aref.indexOf("/cgi-bin/click_through") != -1) { type = "external"; } else if (aref.indexOf("/cgi-bin/view_record") != -1) { type="document"; } int href_start = aref.indexOf("href="")+11; int href_end = aref.indexOf(">", href_start); String href = aref.substring(href_start, href_end); String link_content = aref.substring(href_end+4, aref.length()-10); if (type.equals("external")) { // the external link is everything after the http at the end. String address = href.substring(href.lastIndexOf("http")); address = address.replaceAll("%3[aA]", ":"); address = address.replaceAll("%2[fF]", "/"); return "<a href=\""+address+"\">"+link_content+"</a>"; } if (type.equals("other")) { return "other type of link ("+link_content+")"; } StringBuffer result = new StringBuffer(); result.append(""); // add in the parameters href = href.substring(href.indexOf("?")+1); String [] params = href.split("&"); for (int i=0; i"); } } result.append(link_content); result.append(""); return result.toString(); } // iVia craps out if we ask for a metadata which is not valid. So need // to make sure we only ask for acceptable fields. protected boolean isAcceptableMetadata(String meta) { String valid_metadata = ",title,url,ivia_description,keywords,subjects,"; if (valid_metadata.indexOf(","+meta+",")!=-1) { return true; } return false; } protected BufferedReader makeConnection(String url_string) { BufferedReader reader = null; try { URL url = new URL(url_string); HttpURLConnection connection = (HttpURLConnection)url.openConnection(); InputStream input = connection.getInputStream(); reader = new BufferedReader(new InputStreamReader(input)); } catch (java.net.MalformedURLException e) { System.err.println("IViaProxy Error: Malformed URL: "+url_string); } catch (java.io.IOException e) { System.err.println("IViaProxy Error: An error occurred during IO to url "+url_string); } return reader; } }