package org.greenstone.gsdl3.service;
// Greenstone classes
import org.greenstone.gsdl3.util.*;
// XML classes
import org.w3c.dom.Element;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import java.util.HashMap;
import java.io.File;
import java.io.InputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URLConnection;
import java.net.URL;
import java.net.Authenticator;
import java.net.MalformedURLException;
/**
*
* @author Katherine Don
* @version $Revision: 9874 $
*/
public class IViaProxy
extends ServiceRack {
// the services on offer
// these strings must match what is found in the properties file
protected static final String TEXT_QUERY_SERVICE = "TextQuery";
protected static final String DOC_CONTENT_SERVICE = "DocumentContentRetrieve";
protected static final String DOC_META_SERVICE = "DocumentMetadataRetrieve";
protected static final String QUERY_PARAM = "query";
protected static final String FIELD_PARAM = "fields";
// have standard gs param names for hits per page, and start page
// these need to be mapped to iVia params
protected static final String GS_HITS_PARAM = "hitsPerPage";
protected static final String IM_HITS_PARAM = "no_of_records_per_page";
protected static final String GS_START_PAGE_PARAM = "startPage";
protected static final String IM_START_PAGE_PARAM = "start_page_no";
protected String ivia_server_url = null;
public boolean configure(Element info, Element extra_info) {
Element server_elem = (Element)GSXML.getChildByTagName(info, "iViaServer");
if (server_elem == null) {
System.err.println("IViaProxy.configure error: no iViaServer element found");
return false;
}
ivia_server_url = server_elem.getAttribute("url");
if (ivia_server_url.equals("")) {
System.err.println("IViaProxy.configure error: no url for the iViaServer element");
return false;
}
Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM);
tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
this.short_service_info.appendChild(tq_service);
Element dc_service = this.doc.createElement(GSXML.SERVICE_ELEM);
dc_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
dc_service.setAttribute(GSXML.NAME_ATT, DOC_CONTENT_SERVICE);
this.short_service_info.appendChild(dc_service);
Element dm_service = this.doc.createElement(GSXML.SERVICE_ELEM);
dm_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
dm_service.setAttribute(GSXML.NAME_ATT, DOC_META_SERVICE);
this.short_service_info.appendChild(dm_service);
//
// add some format info to service map if there is any
String path = GSPath.appendLink(GSXML.SEARCH_ELEM, GSXML.FORMAT_ELEM);
Element format = (Element) GSXML.getNodeByPath(extra_info, path);
if (format != null) {
this.format_info_map.put(TEXT_QUERY_SERVICE, this.doc.importNode(format, true));
}
// look for document display format
path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
if (display_format != null) {
this.format_info_map.put(DOC_CONTENT_SERVICE, this.doc.importNode(display_format, true));
// shoudl we make a copy?
}
return true;
}
protected Element getServiceDescription(String service, String lang, String subset) {
if (service.equals(TEXT_QUERY_SERVICE)) {
Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM);
tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER)) {
tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(TEXT_QUERY_SERVICE+".name", lang)));
tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(TEXT_QUERY_SERVICE+".submit", lang)));
tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(TEXT_QUERY_SERVICE+".description", lang)));
}
if (subset == null || subset.equals(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER)) {
Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
tq_service.appendChild(param_list);
Element param = GSXML.createParameterDescription(this.doc, QUERY_PARAM, getTextString("param."+QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null);
param_list.appendChild(param);
String [] field_ids = {"kw", "au", "su", "ti", "de", "fu"};
String [] field_names = {
getTextString("param."+FIELD_PARAM+".kw", lang),
getTextString("param."+FIELD_PARAM+".au", lang),
getTextString("param."+FIELD_PARAM+".su", lang),
getTextString("param."+FIELD_PARAM+".ti", lang),
getTextString("param."+FIELD_PARAM+".de", lang),
getTextString("param."+FIELD_PARAM+".fu", lang) };
param = GSXML.createParameterDescription(this.doc, FIELD_PARAM, getTextString("param."+FIELD_PARAM, lang), GSXML.PARAM_TYPE_ENUM_MULTI, "kw,au,su,ti,de,fu", field_ids, field_names);
param_list.appendChild(param);
String [] hits_options = {"10", "30", "50"};
param = GSXML.createParameterDescription(this.doc, GS_HITS_PARAM, getTextString("param."+GS_HITS_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, "10", hits_options, hits_options);
param_list.appendChild(param);
param = GSXML.createParameterDescription(this.doc, GS_START_PAGE_PARAM, "", GSXML.PARAM_TYPE_INVISIBLE, "1", null, null);
param_list.appendChild(param);
}
return tq_service;
}
if (service.equals(DOC_META_SERVICE)) {
Element dm_service = this.doc.createElement(GSXML.SERVICE_ELEM);
dm_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
dm_service.setAttribute(GSXML.NAME_ATT, DOC_META_SERVICE);
return dm_service;
}
if (service.equals(DOC_CONTENT_SERVICE)) {
Element dc_service = this.doc.createElement(GSXML.SERVICE_ELEM);
dc_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
dc_service.setAttribute(GSXML.NAME_ATT, DOC_CONTENT_SERVICE);
return dc_service;
}
return null;
}
/** Process a text query - implemented by concrete subclasses */
protected Element processTextQuery(Element request) {
// Create a new (empty) result message
Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
result.appendChild(doc_node_list);
Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
if (param_list == null) {
System.err.println("IViaProxy Error:: TextQuery request had no paramList.");
return result; // Return the empty result
}
// Process the request parameters
HashMap params = GSXML.extractParams(param_list, false);
// Make sure a query has been specified
String query = (String) params.get(QUERY_PARAM);
if (query == null || query.equals("")) {
return result; // Return the empty result
}
// tidy whitespace
query = query.replaceAll("\\s+", "+");
String url_string = ivia_server_url+"/cgi-bin/canned_search?theme=gsdl3&query="+query;
// check for fields
String fields = (String) params.get(FIELD_PARAM);
if (fields != null && !fields.equals("")) {
url_string += "&fields="+fields;
}
//check for hits per page
String hits_per_page = (String) params.get(GS_HITS_PARAM);
if (hits_per_page != null && !hits_per_page.equals("")) {
url_string += "&"+IM_HITS_PARAM+"="+hits_per_page;
}
// check for start page
String start_page = (String) params.get(GS_START_PAGE_PARAM);
if (start_page != null && !start_page.equals("")) {
url_string += "&"+IM_START_PAGE_PARAM+"="+start_page;
}
String results_num = null;
String doc_ids = null;
try {
///ystem.err.println("IViaProxy, sending "+url_string);
BufferedReader reader = makeConnection(url_string);
results_num = reader.readLine();
doc_ids = reader.readLine();
} catch (Exception e) {
System.err.println("IViaProxy.TextQuery Error: exception happened during query");
e.printStackTrace();
return result;
}
if (results_num.startsWith("Resources: ")) {
results_num = results_num.substring(11);
} else {
System.err.println("IViaProxy.TextQuery Error: badly formatted results line: "+results_num);
return result;
}
if (doc_ids.startsWith("Ids: ")) {
doc_ids = doc_ids.substring(5).trim();
} else {
System.err.println("IViaProxy.TextQuery Error: badly formatted docs line: "+doc_ids);
return result;
}
// get the num docs and add to a metadata list
Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
result.appendChild(metadata_list);
// Add a metadata element specifying the number of matching documents
long numdocs = Long.parseLong(results_num);
GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", ""+numdocs);
String [] ids = doc_ids.split(" ");
for (int d=0; d");
int pos = 0;
int lastpos = 0;
while ((pos = escaped_content.indexOf("<a ", lastpos))!= -1) {
processed_content.append(escaped_content.substring(lastpos, pos));
int endpos = escaped_content.indexOf("</a>", pos);
if (endpos == -1) {
break;
}
String link = escaped_content.substring(pos, endpos+10);
link = convertLink(link);
processed_content.append(link);
lastpos = endpos+10;
}
processed_content.append(escaped_content.substring(lastpos)); // get the last bit
processed_content.append("");
Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_id);
Document content_doc = this.converter.getDOM(processed_content.toString());
if (content_doc != null) {
Element content_element = content_doc.getDocumentElement();
doc_node.appendChild(this.doc.importNode(content_element, true));
} else {
System.err.println("IViaProxy.getDocument Error: Couldn't parse the node content");
}
return doc_node;
}
/** converts a url from an element into a greenstone suitable one */
protected String convertLink(String aref) {
if (aref.indexOf("href="http") != -1) {
return aref; // an external link
}
String type = "other";
if (aref.indexOf("/cgi-bin/canned_search")!=-1) {
type="query";
} else if (aref.indexOf("/cgi-bin/click_through") != -1) {
type = "external";
} else if (aref.indexOf("/cgi-bin/view_record") != -1) {
type="document";
}
int href_start = aref.indexOf("href="")+11;
int href_end = aref.indexOf(">", href_start);
String href = aref.substring(href_start, href_end);
String link_content = aref.substring(href_end+4, aref.length()-10);
if (type.equals("external")) {
// the external link is everything after the http at the end.
String address = href.substring(href.lastIndexOf("http"));
address = address.replaceAll("%3[aA]", ":");
address = address.replaceAll("%2[fF]", "/");
return "<a href=\""+address+"\">"+link_content+"</a>";
}
if (type.equals("other")) {
return "other type of link ("+link_content+")";
}
StringBuffer result = new StringBuffer();
result.append("");
// add in the parameters
href = href.substring(href.indexOf("?")+1);
String [] params = href.split("&");
for (int i=0; i");
}
}
result.append(link_content);
result.append("");
return result.toString();
}
// iVia craps out if we ask for a metadata which is not valid. So need
// to make sure we only ask for acceptable fields.
protected boolean isAcceptableMetadata(String meta) {
String valid_metadata = ",title,url,ivia_description,keywords,subjects,";
if (valid_metadata.indexOf(","+meta+",")!=-1) {
return true;
}
return false;
}
protected BufferedReader makeConnection(String url_string) {
BufferedReader reader = null;
try {
URL url = new URL(url_string);
HttpURLConnection connection = (HttpURLConnection)url.openConnection();
InputStream input = connection.getInputStream();
reader = new BufferedReader(new InputStreamReader(input));
} catch (java.net.MalformedURLException e) {
System.err.println("IViaProxy Error: Malformed URL: "+url_string);
} catch (java.io.IOException e) {
System.err.println("IViaProxy Error: An error occurred during IO to url "+url_string);
}
return reader;
}
}