package org.greenstone.mat.servlet; import java.io.*; import java.net.*; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.servlet.*; import javax.servlet.http.*; import javax.xml.parsers.*; import org.xml.sax.*; import org.w3c.dom.*; import org.greenstone.gsdl3.util.GlobalProperties; import org.greenstone.gsdl3.util.GSPath; import java.net.Authenticator; import java.net.PasswordAuthentication; public class MatServlet extends HttpServlet { private int port_number = 0; private String oaiPrefix =""; private String titleString; private String h1String; private String maxRecord; private GlobalProperties globalProperty; private String fileSeparator; private String cssString; private String headerString; private String javaScript; private String headerString2; private String gsdl3Home; private String logoURL; private String description; private String hostName; private String proxyHost="http.proxyHost"; private String proxyPort="http.proxyPort"; private String proxyHostContent = ""; private String proxyPortContent = ""; //private String collection_creator = ""; private String proxyUserName= ""; private String proxyUserPassword = ""; public void doGet(HttpServletRequest request,HttpServletResponse response)throws ServletException, IOException { port_number = request.getLocalPort(); loadRuntimeSettings(); PrintWriter out = response.getWriter(); response.setContentType("text/html"); response.setHeader("pragma", "no-cache"); out.println(""); out.println(headerString); out.println(""); out.println(""); out.println(h1String); out.println("

"+description+"

"); out.println("
"); out.println("

OAI URL:

"); out.println("

"); out.println("
"); out.println(""); out.println(""); out.println (""); out.println (""); out.close(); } protected void doPost(HttpServletRequest req, HttpServletResponse res)throws ServletException, IOException { port_number = req.getLocalPort(); loadRuntimeSettings(); res.setContentType("text/html"); res.setHeader("pragma", "no-cache"); PrintWriter out = res.getWriter(); out.println(""); if (req.getParameter("metadataPrefix") != null) { oaiPrefix = req.getParameter("metadataPrefix"); String maxRecords = req.getParameter("maxrecords"); Pattern pa = Pattern.compile("[0-9]{1,5}"); Matcher ma = pa.matcher(maxRecords); out.println(headerString2); out.println(""); out.println(h1String); out.println("

Please wait ....
It's downloading OAI records

"); out.println(""); if(ma.matches()){ int num = Integer.parseInt(maxRecords); if(num>0){ downloadCollection(out, req, res, oaiPrefix, maxRecords); } else{ downloadCollection(out, req, res, oaiPrefix, maxRecord); } } else{ downloadCollection(out, req, res, oaiPrefix, maxRecord); } } else if(req.getParameter("matShell") != null){ out.println(headerString2); out.println(""); out.println(h1String); out.println("

Please wait ...

It's building collection now.

"); out.println(""); buildCollection(out,req.getParameter("matShell"),req.getParameter("collectionName"),req.getParameter("collectionURL"),req.getParameter("oaiPrefix")); } else if (req.getParameter("collName") != null){ out.println(headerString); out.println(""); out.println(h1String); analyzeCollection(out, req.getParameter("collName"),req.getParameter("collURL"),req.getParameter("collHost"),req.getParameter("oaiPrefix")); } else { out.println(headerString); out.println(""); out.println(h1String); java.net.URL oaiURL; String oaiURLString = req.getParameter("oaiurl"); if (! (oaiURLString.startsWith("http://"))) { oaiURLString = "http://" + oaiURLString; // add on protocol if missing } // URL checks // check if Java can make a URL from the string try { oaiURL = new URL(oaiURLString); } catch (MalformedURLException e) { out.println("

Malformed URL Exception caught: " + e.getMessage() + "

"); out.println("

The system cannot recognise the URL you have entered.

"); return; // go no further } //out.println("

host: " + oaiURL.getHost() + "

" ); //out.println("

protocol: " + oaiURL.getProtocol() + "

" ); // disallow anything with waikato as part of the host // note: this blocks things like waikato.uiuc.edu - // but guess this is highly unlikely to occur // (also turns off the researchcommons) // could use either of these approaches, waikato or waikato.ac.nz if (oaiURL.getHost().indexOf("researchcommons.waikato.ac.nz") == -1 ) { // not the RC /* if((oaiURL.getHost().indexOf("waikato") != -1 ) || (oaiURL.getHost().indexOf("waikato.ac.nz") != -1 ) ) { out.println("

This service cannot be used to access Waikato URLs

"); return; }*/ } // need to prevent machine names on their own, e.g. smith // being allowed through // approach 1: explicit blacklisting if (oaiURL.getHost().equals("smith") || oaiURL.getHost().equals("wesson") ) { out.println("

This service cannot be used to access these URLs

"); return; } // approach 2: require at least one . in the host URL if (oaiURL.getHost().indexOf(".") == -1 ) { // i.e. no . in URL host out.println("

This service cannot be used to access URLs of this form.

"); return; } // jones.cs would get through to this point, might this mean something // from the perspective of the host machine?? Does it matter? // will automatic domain completion be applied? by java? by 'the network'? // require 2 dots in the host? does that help at all ? // domain suffix whitelisting is impractical here, as we'd have to // list all countries in the world // do an OAI verb=identify check to make sure it is an OAI server // trim the URL back and then add on verb=Identify Properties systemSettings = System.getProperties(); if( proxyHostContent.equals("") || proxyPortContent.equals("")){} else{ systemSettings.put(proxyHost, proxyHostContent); systemSettings.put(proxyPort, proxyPortContent); } URL url = oaiURL; String identifyVerb = ""; if (! url.getPath().endsWith("?")) { identifyVerb += "?"; } identifyVerb += "verb=Identify"; URL identifyURL = new URL (url.toString() + identifyVerb); if( proxyUserName.equals("") || proxyUserPassword.equals("")){} else{ Authenticator.setDefault( new ProxyAuthenticator(proxyUserName, proxyUserPassword) ); } HttpURLConnection connection = (HttpURLConnection)identifyURL.openConnection(); connection.connect(); Document identifyDocument; try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); identifyDocument = builder.parse( connection.getInputStream() ); Element oaiElement = identifyDocument.getDocumentElement(); if (oaiElement.getTagName() == "OAI-PMH" ) { //out.println("

OAI-PMH element found...

"); } else { out.println("

Error: OAI-PMH element not found..exiting

"); return; } NodeList identifyNodeList = identifyDocument.getElementsByTagName("Identify"); Node identifyNode = null; if (identifyNodeList.getLength() == 1 ) { identifyNode = identifyNodeList.item(0); System.out.println("\n"); } else { out.println("

Error: Identify node not found... exiting

"); return; } NodeList identifyChildList = identifyNode.getChildNodes(); out.println(""); for (int i=0; i < identifyChildList.getLength(); i++) { if (identifyChildList.item(i).getNodeName() == "repositoryName" ) { out.println(""); } if (identifyChildList.item(i).getNodeName() == "baseURL" ) { Node n = identifyChildList.item(i); String baseURL = getTextContent(n); if(baseURL.startsWith("http://")){ out.println(""); } else{ out.println(""); } } } out.println("
Repository Name: " +getTextContent(identifyChildList.item(i)) + "
Base URL:" + baseURL + "
Base URL:" + oaiURLString + "
"); } catch (SAXParseException spe) { out.println(spe.getMessage()); } catch (SAXException sxe) { out.println(sxe.toString()); } catch (ParserConfigurationException pce) { out.println(pce.toString()); } catch (IOException ioe) { out.println(ioe.toString()); } // now get the metadata prefixes String metadataFormats = ""; if (! url.getPath().endsWith("?")) { metadataFormats += "?"; } metadataFormats += "verb=ListMetadataFormats"; URL metadataFormatsURL = new URL (url.toString() + metadataFormats); URLConnection connection2 = metadataFormatsURL.openConnection(); connection.connect(); Document document2; try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); document2 = builder.parse( connection2.getInputStream() ); NodeList prefixList = document2.getElementsByTagName("metadataPrefix"); // create UI based on response if ( prefixList.getLength() > 0 ) { out.println("

Choose one metadata prefix to use:

"); out.println("
"); out.println(""); for (int i=0; i < prefixList.getLength(); i++) { out.println(""); } //System.out.println(""); out.println("
"); String id = "radioID" + i; Node n = prefixList.item(i); String prefix = getTextContent(n); //String prefix = prefixList.item(i).getNodeValue(); if (prefix.equals("oai_dc")) { out.println(""); //System.out.println("Dublin Core<\\MetadataElement>\n"); } else { out.println(""); //System.out.println(""+prefix+"<\\MetadataElement>\n"); } out.print("
"); out.println("
"); // need to pass oaiurl through as well (again) out.println("
"); out.println("Max records:
"); // submit button out.println("

"); out.println("
"); out.println("

Warning: Generating the statistics and visualization will take some time:

"); /* out.println(""); out.println("
No.of Records Estimated Time"); out.println("
100 5 minutes"); out.println("
500 10 minutes"); out.println("
1000 18 minutes"); out.println("
2000 30 minutes"); out.println("
"); */ out.println("

This tool is designed to work with Dublin Core metadata: note that the mapping of qualified Dublin Core to simple Dublin Core (as in oai_dc) may affect the results."); } else { out.println("

Error: no metadata prefixes found... exiting

"); return; } } catch (SAXParseException spe) { out.println(spe.getMessage()); } catch (SAXException sxe) { out.println(sxe.toString()); } catch (ParserConfigurationException pce) { out.println(pce.toString()); } catch (IOException ioe) { out.println(ioe.toString()); } // check we get a valid XML document back // check OAI-PMH element // check we have a and elements // maybe check the baseURL against the query we issued? out.print(""); out.close(); } // endif } // end doPost private static String getTextContent(Node node) { Node child; String sContent = node.getNodeValue() != null ? node.getNodeValue() : ""; NodeList nodes = node.getChildNodes(); for(int i = 0; i < nodes.getLength(); i++) { child = nodes.item(i); sContent += child.getNodeValue() != null ? child.getNodeValue() : ""; if(nodes.item(i).getChildNodes().getLength() > 0) { sContent += getTextContent(nodes.item(i)); } } return sContent; } protected void buildCollection( PrintWriter out, String matShell2, String collName, String oaiURLString, String oaiPrefix) throws ServletException, IOException{ out.println("

"); out.println(""); out.println("

Building collection...

"); String host = "http://"+hostName+":"+port_number+"/mat/"; out.println("
"); out.println("
"); out.println("
"); out.println("
"); out.println("
"); out.println("
"); out.println("

" + matShell2 + "

"); out.flush(); String os = "linux"; if(System.getProperty("os.name").toLowerCase().indexOf("windows")!=-1){ gsdl3Home = gsdl3Home.replaceAll("\\\\", "/"); os = "windows"; } String gsdl3Root = GSPath.removeLastLink(gsdl3Home); if(os.equals("windows")){ gsdl3Root = gsdl3Root.replaceAll("/","\\\\"); gsdl3Home = gsdl3Home.replaceAll("/", "\\\\"); } // downloading //String gs3Root = "/research/cc108/greenstone3Project"; String gs3Root = gsdl3Root; String collectDir = gsdl3Home + fileSeparator +"sites"+fileSeparator+"localsite"+ fileSeparator +"collect"; String logFile = gsdl3Root +fileSeparator+"ext"+fileSeparator+"mat"+fileSeparator+"tmp"+fileSeparator + "log.txt"; String cacheDir = gsdl3Root +fileSeparator+"ext"+fileSeparator+"mat"+fileSeparator+"tmp"+fileSeparator + collName; if(System.getProperty("os.name").toLowerCase().indexOf("windows")!=-1){ File wd = new File(gsdl3Root + fileSeparator + "ext" + fileSeparator + "mat" + fileSeparator + "bin" + fileSeparator +"script"); String[] arrays = new String[13]; arrays[0] = "cmd"; arrays[1] = "/c"; arrays[2] = "start"; arrays[3] = "/MIN"; arrays[4] = "mat-colbuild.bat"; arrays[5] = collName; arrays[6] = oaiURLString; arrays[7] = cacheDir; arrays[8] = "100"; arrays[9] = oaiPrefix; arrays[10] = gs3Root; arrays[11] = collectDir; arrays[12] = logFile; Process p2 = processBatch(arrays,out,wd); out.print("
"); p2 = null; p2.destroy(); } else{ Process p2 = processShell(matShell2, out); if (p2.exitValue() == 0){ out.println("

Collection built.

"); } else{ out.println("

Collection not built.

"); } out.print("
"); p2 = null; p2.destroy(); } out.close(); } private void downloadCollection( PrintWriter out, HttpServletRequest req, HttpServletResponse res, String oaiPrefix, String Records) throws ServletException, IOException { String oaiURLString = req.getParameter("oaiURL"); String metadataprefix = req.getParameter("metadataPrefix"); String collName = generateCollName(); String host = "http://"+hostName+":"+port_number+"/"; out.println("

"); out.println(""); out.println("

Downloading OAI documents...

"); out.flush(); // move to correct directory String os = "linux"; if(System.getProperty("os.name").toLowerCase().indexOf("windows")!=-1){ gsdl3Home = gsdl3Home.replaceAll("\\\\", "/"); os = "windows"; } String gsdl3Root = GSPath.removeLastLink(gsdl3Home); if(os.equals("windows")){ gsdl3Root = gsdl3Root.replaceAll("/","\\\\"); gsdl3Home = gsdl3Home.replaceAll("/", "\\\\"); } String maxRecords = Records; String cacheDir = gsdl3Root +fileSeparator+"ext"+fileSeparator+"mat"+fileSeparator+"tmp"+fileSeparator + collName; String gs3Root = gsdl3Root; String collectDir = gsdl3Home + fileSeparator +"sites"+fileSeparator+"localsite"+ fileSeparator +"collect"; String logFile = gsdl3Root +fileSeparator+"ext"+fileSeparator+"mat"+fileSeparator+"tmp"+fileSeparator + "log.txt"; String proxy_settings = " "; String matShell = ""; if(!proxyHostContent.equals("") && !proxyPortContent.equals("")){ proxy_settings = "\"-proxy_on -proxy_host "+proxyHostContent+" -proxy_port "+proxyPortContent+"\""; if(!proxyUserName.equals("") && !proxyUserPassword.equals("")){ proxy_settings = proxy_settings.substring(1,proxy_settings.length()-1); proxy_settings = "\""+ proxy_settings + " -user_name " +proxyUserName+" -user_password "+proxyUserPassword +"\""; } } if(System.getProperty("os.name").toLowerCase().indexOf("windows")!=-1){ File wd = new File(gsdl3Root + fileSeparator + "ext" + fileSeparator + "mat" + fileSeparator + "bin" + fileSeparator +"script"); ArrayList alist = new ArrayList(); alist.add("cmd"); alist.add("/c"); alist.add("start"); alist.add("/MIN"); alist.add("mat-colbuild-download.bat"); alist.add(collName); alist.add(oaiURLString); alist.add(cacheDir); alist.add(maxRecords); alist.add(metadataprefix); alist.add(gs3Root); alist.add(collectDir); alist.add(logFile); //alist.add(collection_creator); alist.add(proxy_settings); String[] arrays = new String[alist.size()]; for(int i = 0; i< arrays.length; i++){ arrays[i] = (String)alist.get(i); } Process p = processBatch(arrays,out,wd); out.flush(); p.destroy(); } else{ matShell = gsdl3Root + fileSeparator + "ext" + fileSeparator + "mat" + fileSeparator + "bin" + fileSeparator +"script" + fileSeparator + "mat-colbuild-download.bash " + collName + " " + oaiURLString + " " + cacheDir + " " + maxRecords + " " + metadataprefix + " " + gs3Root + " " + collectDir + " " + logFile + " " // + collection_creator // + " " + proxy_settings; out.println("

" + matShell + "

"); out.flush(); Process p = processShell(matShell, out); if (p.exitValue() == 0){ out.println("

Collection downloaded.

"); } else { out.println("

Collection not downloaded properly.

"); } out.flush(); p.destroy(); } String matShell2 = ""; matShell2 = gsdl3Root + fileSeparator + "ext" + fileSeparator +"mat"+ fileSeparator +"bin" + fileSeparator + "script" + fileSeparator + "mat-colbuild.bash " + collName + " " + oaiURLString + " " + cacheDir + " " + maxRecords + " " + metadataprefix + " " + gs3Root + " " + collectDir + " " + logFile; out.println("
"); out.println(""); out.println(""); out.println(""); out.println(""); out.println(""); //out.println("
"); out.println("
"); out.println("
"); out.print(""); out.flush(); out.close(); } /* produce a random 7 letter collection name */ private String generateCollName () { Random random = new Random(); StringBuffer message = new StringBuffer(); int offset = 97; // = "a" message.append( (char) ( random.nextInt( 26 ) + offset ) ); message.append( (char) ( random.nextInt( 26 ) + offset ) ); message.append( (char) ( random.nextInt( 26 ) + offset ) ); message.append( (char) ( random.nextInt( 26 ) + offset ) ); message.append( (char) ( random.nextInt( 26 ) + offset ) ); message.append( (char) ( random.nextInt( 26 ) + offset ) ); message.append( (char) ( random.nextInt( 26 ) + offset ) ); return message.toString(); } private Process processBatch(String[] command, PrintWriter out, File wd){ Process proc; String s; try{ proc = Runtime.getRuntime().exec(command,null,wd); BufferedReader stdInput = new BufferedReader(new InputStreamReader(proc.getInputStream())); BufferedReader stdError = new BufferedReader(new InputStreamReader(proc.getErrorStream())); StringBuffer stdInputBuffer = new StringBuffer(); while ((s = stdInput.readLine()) != null) { stdInputBuffer.append(s+"\n"); if(s.indexOf("-->")!=-1){ s = s.replaceAll("-->", ""); out.println(""); } else{ out.println(""); } out.flush(); } StringBuffer stdErrorBuffer = new StringBuffer(); while ((s = stdError.readLine()) != null) { stdErrorBuffer.append(s+"\n"); if(s.indexOf("-->")!=-1){ s = s.replaceAll("-->", ""); out.println(""); } else{ out.println(""); } out.flush(); } out.println("

Here is the standard output:

\n"); out.println("

" + stdInputBuffer + "

"); out.println("

Here is the standard error (if any):

\n"); out.println("

" + stdErrorBuffer + "

"); out.flush(); //if (p.exitValue() != 0) if (false){ out.println("

An error occurred while building the collection.

"); out.println("

Here is the standard output:

\n"); out.println("

" + stdInputBuffer + "

"); out.println("

Here is the standard error (if any):

\n"); out.println("

" + stdErrorBuffer + "

"); } InputStream is = proc.getInputStream(); is.close(); OutputStream os = proc.getOutputStream(); os.close(); InputStream es = proc.getErrorStream(); es.close(); stdInput.close(); stdError.close(); proc.waitFor(); return proc; }catch(Exception ex){ ex.printStackTrace(); } return null; } private Process processShell( String command, PrintWriter out) { String s= ""; try { String[] args = new String[]{"sh", "-c", command}; Process p = Runtime.getRuntime().exec(args); BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); StringBuffer stdInputBuffer = new StringBuffer(); while ((s = stdInput.readLine()) != null) { stdInputBuffer.append(s+"\n"); if(s.indexOf("-->")!=-1){ s = s.replaceAll("-->", ""); out.println(""); } else{ out.println(""); } out.flush(); } StringBuffer stdErrorBuffer = new StringBuffer(); while ((s = stdError.readLine()) != null) { stdErrorBuffer.append(s+"\n"); if(s.indexOf("-->")!=-1){ s = s.replaceAll("-->", ""); out.println(""); } else{ out.println(""); } out.flush(); } out.println("

Here is the standard output:

\n"); out.println("

" + stdInputBuffer + "

"); out.println("

Here is the standard error (if any):

\n"); out.println("

" + stdErrorBuffer + "

"); out.flush(); //if (p.exitValue() != 0) if (false){ out.println("

An error occurred while building the collection.

"); out.println("

Here is the standard output:

\n"); out.println("

" + stdInputBuffer + "

"); out.println("

Here is the standard error (if any):

\n"); out.println("

" + stdErrorBuffer + "

"); } InputStream is = p.getInputStream(); is.close(); OutputStream os = p.getOutputStream(); os.close(); InputStream es = p.getErrorStream(); es.close(); stdInput.close(); stdError.close(); return p; } catch (IOException e) { out.println("exception happened - here's what I know: "); out.println(e.toString()); out.flush(); } return null; } private void analyzeCollection(PrintWriter out, String collectionName,String collectionURL,String collectionHost, String Prefix){ String collName = collectionName; String oaiURLString = collectionURL; String host = collectionHost; try{ DescribeMessenger dm = new DescribeMessenger(collName,oaiURLString); out.println("

Generating statistics and visualisations...

"); out.flush(); out.println("

please wait.

"); out.flush(); boolean status = dm.describeMatadata(out,collName,oaiURLString,Prefix,port_number); if(status){ out.println(""); out.println("View the report"); out.flush(); } out.println(""); out.flush(); }catch(Exception e){e.printStackTrace(out); out.println("

"+e.toString()+"

"); } out.close(); } private void loadRuntimeSettings(){ fileSeparator = File.separator; gsdl3Home = globalProperty.getGSDL3Home(); globalProperty = new GlobalProperties(); maxRecord ="10"; String os = "linux"; if(System.getProperty("os.name").toLowerCase().indexOf("windows")!=-1){ gsdl3Home = gsdl3Home.replaceAll("\\\\", "/"); os = "windows"; } String gsdl3Root = GSPath.removeLastLink(gsdl3Home); if(os.equals("windows")){ gsdl3Root = gsdl3Root.replaceAll("/","\\\\"); gsdl3Home = gsdl3Home.replaceAll("/", "\\\\"); } try{ Properties prop = new Properties(); FileInputStream fis = new FileInputStream(gsdl3Root+fileSeparator+"ext"+fileSeparator+"mat"+fileSeparator+"properties.xml"); prop.load(fis); titleString =""+ prop.getProperty("Servlet.Title")+""; h1String ="

"+ prop.getProperty("Servlet.Head")+"

"; cssString = ""; javaScript = "";; headerString2 = "" + titleString + "\n" + javaScript + cssString+"\n"; headerString ="" + titleString + "\n" + cssString + "\n"; logoURL =prop.getProperty("Servlet.Logo"); description = prop.getProperty("Servlet.Description"); proxyHostContent = prop.getProperty("Servlet.proxyHost"); proxyPortContent = prop.getProperty("Servlet.proxyPort"); proxyUserName = prop.getProperty("Servlet.proxyUserName"); proxyUserPassword = prop.getProperty("Servlet.proxyUserPassword"); //collection_creator = prop.getProperty("Servlet.Collection.Creator"); }catch(Exception ex){ ex.printStackTrace(); } try { java.net.InetAddress localMachine = java.net.InetAddress.getLocalHost(); hostName = localMachine.getHostName(); }catch (java.net.UnknownHostException uhe) { uhe.printStackTrace(); } } } class ProxyAuthenticator extends Authenticator { private String username; private char[] password; public ProxyAuthenticator (String username, String password){ this.username = username; this.password = password.toCharArray(); } protected PasswordAuthentication getPasswordAuthentication(){ return new PasswordAuthentication (username, password ); } }