source: other-projects/trunk/greenstone3-extension/mat/src/java/org/greenstone/gsdl3_extension/mat/servlet/MatServlet.java@ 17365

Last change on this file since 17365 was 17365, checked in by cc108, 16 years ago

Updating Mat Source Code

File size: 30.7 KB
Line 
1package org.greenstone.gsdl3_extension.mat.servlet;
2
3import java.io.*;
4import java.net.*;
5import java.util.*;
6import java.util.regex.Matcher;
7import java.util.regex.Pattern;
8
9import javax.servlet.*;
10import javax.servlet.http.*;
11import javax.xml.parsers.*;
12
13import org.xml.sax.*;
14import org.w3c.dom.*;
15
16public class MatServlet extends HttpServlet {
17
18 private int port_number = 0;
19 private String oaiPrefix ="";
20 private String titleString = "<title>Metadata Analysis Tool - Alpha</title>";
21 private String h1String = "<h2>Metadata Analysis Tool - alpha 2</h2>";
22 private String maxRecord ="10";
23
24 public void doGet(HttpServletRequest request,HttpServletResponse response)throws ServletException, IOException {
25
26 PrintWriter out = response.getWriter();
27 String cssString = "<link rel=\"stylesheet\" href=\"http://localhost:"+port_number+"/greenstone3/mat/script/mat.css\" type=\"text/css\" >";
28 String headerString = "<head>" + titleString + "\n" + cssString + "</head>\n";
29
30 port_number = request.getLocalPort();
31 response.setContentType("text/html");
32 response.setHeader("pragma", "no-cache");
33
34 out.println("<html>");
35 out.println(headerString);
36 out.println("<body>");
37 out.println("<img src=http://www.cs.waikato.ac.nz/images-cs/uni.gif>");
38 out.println(h1String);
39 out.println("<p>This tool will generate statistics and visualisations of OAI repositories</p>");
40 out.println("<p>Enter the URL of the OAI repository to analyse, e.g.:</p>");
41 out.println( "<p><code>http://www.ideals.uiuc.edu/dspace-oai/request</code></p>");
42 out.println("<div><form method=\"post\" action=\"http://localhost:"+port_number+"/greenstone3/mat\">");
43 out.println("<p>OAI URL: <input type=\"text\" name=\"oaiurl\" size=\"140\"></p>");
44 out.println("<p><input type=\"submit\" value=\"Analyse repository\"></p>");
45 out.println("</form>");
46 out.println("<p>Or use these shortcuts:</p>");
47
48 out.print("<table><tr>");
49 out.print("<div><form method=\"post\" action=\"http://localhost:"+port_number+"/greenstone3/mat\">");
50 out.print("<input type=\"hidden\" name=\"oaiurl\" value=\"http://www.ideals.uiuc.edu/dspace-oai/request\"> ");
51 out.print("<input type=\"submit\" value=\"IDEALS at U. Illinois\"></form>");
52
53 out.print("<form method=\"post\" action=\"http://localhost:"+port_number+"/greenstone3/mat\">");
54 out.print("<input type=\"hidden\" name=\"oaiurl\" value=\"http://dspace.lib.cranfield.ac.uk/dspace-oai/request\"> ");
55 out.print( "<input type=\"submit\" value=\"QUEprints (DSpace) at Cranfield U.\"></form></div>");
56
57 out.print("<form method=\"post\" action=\"http://localhost:"+port_number+"/greenstone3/mat\">" +
58 "<input type=\"hidden\" name=\"oaiurl\" value=\"http://cogprints.org/cgi/oai2\"> " +
59 "<input type=\"submit\" value=\"Cogprints - Cognitive Science Eprint Archive\"></form>");
60
61 out.print("<form method=\"post\" action=\"http://localhost:"+port_number+"/greenstone3/mat\">" +
62 "<input type=\"hidden\" name=\"oaiurl\" value=\"http://azmemory.lib.az.us/cgi-bin/oai.exe\"> " +
63 "<input type=\"submit\" value=\"Arizona Memory Project\"></form>");
64
65 out.println("</div>");
66 out.print("</tr></table>");
67
68 out.println ("<table border=\"0\" align=\"center\">\r\n");
69 out.println ("<colgroup width=\"900\" span=\"3\">\r\n");
70
71 out.println ("<tr>\r\n");
72 out.println ("<td><b>Sample Reports</b>\r\n");
73 out.println ("<td><b>NZ Reports</b>\r\n");
74 out.println ("<td><b>NZ Reports</b>\r\n");
75 out.println ("</tr>\r\n");
76
77 out.println ("<tr>\r\n");
78 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/gzzraup/Overall.html\">Cogprints,100 records</a>\r\n");
79 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/cewmfnn/Overall.html\">The University of Auckland,1960 records</a>\r\n");
80 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/pitnyys/Overall.html\">Christchurch Polytechnic Institute of Technology,2 records</a>\r\n");
81 out.println ("</tr>\r\n");
82
83 out.println ("<tr>\r\n");
84 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/pberglz/Overall.html\">IDEALS @ UIUC,500 records</a>\r\n");
85 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/pljncki/Overall.html\">Auckland University of Technology,320 records</a>\r\n");
86 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/detbosl/Overall.html\">Manukau Institute of Technology,13 records</a>\r\n");
87 out.println ("</tr>\r\n");
88
89 out.println ("<tr>\r\n");
90 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/upqzqen/Overall.html\">NZ research,4600 records</a>\r\n");
91 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/ovdikqt/Overall.html\">University of Canterbury,640 records</a>\r\n");
92 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/cderfvb/Overall.html\">NorthTec,19 records</a>\r\n");
93 out.println ("</tr>\r\n");
94
95 out.println ("<tr>\r\n");
96 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/nwjrwew/Overall.html\">ResearchBank,6000 records</a>\r\n");
97 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/ixrwnqs/Overall.html\">Lincoln University,430 records</a>\r\n");
98 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/polkiuj/Overall.html\">Open Polytechnic of New Zealand,14 records</a>\r\n");
99 out.println ("</tr>\r\n");
100
101 out.println ("<tr>\r\n");
102 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/cfxnxcd/Overall.html\">MINDS @ UW,6000 records</a>\r\n");
103 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/icsqvwj/Overall.html\">Massey University,290 records</a>\r\n");
104 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/qwertyu/Overall.html\">Unitec New Zealand,55 records</a>\r\n");
105 out.println ("</tr>\r\n");
106
107 out.println ("<tr>\r\n");
108 out.println ("<td>&nbsp;\r\n");
109 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/kengnzg/Overall.html\">University of Otago,670 records</a>\r\n");
110 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/gbnhyuj/Overall.html\">Universal College of Learning,12 records</a>\r\n");
111 out.println ("</tr>\r\n");
112
113 out.println ("<tr>\r\n");
114 out.println ("<td>&nbsp;\r\n");
115 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/ylxiywr/Overall.html\">Victoria University of Wellington,220 records</a>\r\n");
116 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/poiuytr/Overall.html\">Whitireia Community Polytechnic,59 records</a>\r\n");
117 out.println ("</tr>\r\n");
118
119 out.println ("<tr>\r\n");
120 out.println ("<td>&nbsp;\r\n");
121
122 out.println ("<td><a href=\"http://www.nzdl.org/greenstone3/mat/nwgfril/Overall.html\">University of Waikato,270 records</a>\r\n");
123 out.println ("</tr>\r\n");
124 out.println ("</tr>\r\n</table>");
125
126 out.println("<p style=\"font-size:small;\" align=\"right\">powered by <a href=\"http://www.greenstone.org/greenstone3-home\">Greenstone3</a></p>");
127 out.println("<p align=\"left\">Comments to <a href=\"http://www.cs.waikato.ac.nz/~daven/\">Dave Nichols</a></p>");
128 out.println ("</body>");
129 out.println ("</html>");
130 out.close();
131 }
132
133 protected void doPost(HttpServletRequest req, HttpServletResponse res)throws ServletException, IOException {
134
135 String cssString = "<link rel=\"stylesheet\" href=\"http://localhost:"+port_number+"/greenstone3/mat/script/mat.css\" type=\"text/css\" >";
136 String headerString = "<head>" + titleString + "\n" + cssString + "</head>\n";
137 String javaScript = "<script type=\"text/javascript\" src=\"http://localhost:"+port_number+"/greenstone3/mat/script/status3.js\"></script>";
138 String headerString2 = "<head>" + titleString + "\n" + javaScript + cssString+"</head>\n";
139
140 port_number = req.getLocalPort();
141 res.setContentType("text/html");
142 res.setHeader("pragma", "no-cache");
143 PrintWriter out = res.getWriter();
144
145 out.println("<html>");
146
147 if ( req.getParameter("metadataPrefix") != null) {
148
149 oaiPrefix = req.getParameter("metadataPrefix");
150 String maxRecords = req.getParameter("maxrecords");
151
152 Pattern pa = Pattern.compile("[0-9]{1,5}");
153 Matcher ma = pa.matcher(maxRecords);
154
155 out.println(headerString2);
156 out.println("<body onLoad=\"autoSubmit()\">");
157 out.println(h1String);
158 out.println("<p>Please wait .... <br> It's downloading OAI records </p>");
159 out.println("<input type=\"button\" onClick=\"showdiv()\" value=\"show debug infomation\">");
160
161 if(ma.matches()){
162 int num = Integer.parseInt(maxRecords);
163 if(num>0){
164 downloadCollection(out, req, res, oaiPrefix, maxRecords);
165 }
166 else{
167 downloadCollection(out, req, res, oaiPrefix, maxRecord);
168 }
169 }
170 else{
171 downloadCollection(out, req, res, oaiPrefix, maxRecord);
172 }
173
174 }
175 else if(req.getParameter("matShell") != null){
176 out.println(headerString2);
177 out.println("<body onLoad=\"autoSubmit2()\">");
178 out.println(h1String);
179 out.println("<p>Please wait ...</p><p>It's building collection now.</p>");
180 out.println("<input type=\"button\" onClick=\"showdiv()\" value=\"show debug infomation\">");
181 buildCollection(out,req.getParameter("matShell"),req.getParameter("collectionName"),req.getParameter("collectionURL"),req.getParameter("oaiPrefix"));
182 }
183
184 else if (req.getParameter("collName") != null){
185 out.println(headerString);
186 out.write("<script type=\"text/javascript\" src=\"http://localhost:"+port_number+"/greenstone3/mat/script/getInfomation.js\"></script>\r\n");
187 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/yahoo/yahoo-min.js\"></script>\r\n");
188 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/event/event-min.js\"></script>\r\n");
189 out.write("<script type=\"text/javascript\" src=\"http://yui.yahooapis.com/2.4.1/build/connection/connection-min.js\"></script>\r\n");
190 out.println("<body>");
191 out.println(h1String);
192 analyzeCollection(out, req.getParameter("collName"),req.getParameter("collURL"),req.getParameter("collHost"),req.getParameter("oaiPrefix"));
193 }
194
195 else {
196 out.println(headerString);
197 out.println("<body>");
198 out.println(h1String);
199
200 java.net.URL oaiURL;
201 String oaiURLString = req.getParameter("oaiurl");
202 //System.out.println("OAIURL:"+oaiURLString);
203
204 if (! (oaiURLString.startsWith("http://"))) {
205 oaiURLString = "http://" + oaiURLString; // add on protocol if missing
206 }
207
208 // URL checks
209 // check if Java can make a URL from the string
210
211 try {
212 oaiURL = new URL(oaiURLString);
213 }
214 catch (MalformedURLException e) {
215 out.println("<p>Malformed URL Exception caught: " + e.getMessage() + "</p>");
216 out.println("<p>The system cannot recognise the URL you have entered.</p>");
217 return; // go no further
218 }
219
220 //out.println("<p>host: " + oaiURL.getHost() + "</p>" );
221 //out.println("<p>protocol: " + oaiURL.getProtocol() + "</p>" );
222
223 // disallow anything with waikato as part of the host
224 // note: this blocks things like waikato.uiuc.edu -
225 // but guess this is highly unlikely to occur
226 // (also turns off the researchcommons)
227 // could use either of these approaches, waikato or waikato.ac.nz
228
229 if ( oaiURL.getHost().indexOf("researchcommons.waikato.ac.nz") == -1 ) { // not the RC
230 /*
231 if((oaiURL.getHost().indexOf("waikato") != -1 ) ||
232 (oaiURL.getHost().indexOf("waikato.ac.nz") != -1 ) ) {
233 out.println("<p>This service cannot be used to access Waikato URLs</p>");
234 return;
235 }*/
236 }
237
238 // need to prevent machine names on their own, e.g. smith
239 // being allowed through
240
241 // approach 1: explicit blacklisting
242
243 if (oaiURL.getHost().equals("smith") ||
244 oaiURL.getHost().equals("wesson") ) {
245 out.println("<p>This service cannot be used to access these URLs</p>");
246 return;
247 }
248
249 // approach 2: require at least one . in the host URL
250 if (oaiURL.getHost().indexOf(".") == -1 ) { // i.e. no . in URL host
251 out.println("<p>This service cannot be used to access URLs of this form.</p>");
252 return;
253 }
254
255 // jones.cs would get through to this point, might this mean something
256 // from the perspective of the host machine?? Does it matter?
257 // will automatic domain completion be applied? by java? by 'the network'?
258 // require 2 dots in the host? does that help at all ?
259 // domain suffix whitelisting is impractical here, as we'd have to
260 // list all countries in the world
261
262 // do an OAI verb=identify check to make sure it is an OAI server
263 // trim the URL back and then add on verb=Identify
264
265
266 //System.setProperty("http.proxyHost", "wwwcache.cs.waikato.ac.nz");
267 //System.setProperty("http.proxyPort","80");
268 //System.setProperty("http.nonProxyHosts", "localhost|*.waikato.ac.nz");
269
270 Properties systemSettings = System.getProperties();
271 systemSettings.put("http.proxyHost", "wwwcache.cs.waikato.ac.nz");
272 systemSettings.put("http.proxyPort", "80");
273
274 URL url = oaiURL;
275 String identifyVerb = "";
276
277 if (! url.getPath().endsWith("?")) {
278 identifyVerb += "?";
279 }
280 identifyVerb += "verb=Identify";
281
282 URL identifyURL = new URL (url.toString() + identifyVerb);
283 HttpURLConnection connection = (HttpURLConnection)identifyURL.openConnection();
284 //URLConnection connection = identifyURL.openConnection();
285 connection.connect();
286
287 //BufferedReader inIdentify = new BufferedReader(new InputStreamReader(
288 //connection.getInputStream()));
289
290 Document identifyDocument;
291
292 try {
293 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
294 DocumentBuilder builder = factory.newDocumentBuilder();
295 identifyDocument = builder.parse( connection.getInputStream() );
296
297 Element oaiElement = identifyDocument.getDocumentElement();
298
299 if (oaiElement.getTagName() == "OAI-PMH" ) {
300 //out.println("<p>OAI-PMH element found...</p>");
301 }
302 else {
303 out.println("<p>Error: OAI-PMH element not found..exiting</p>");
304 return;
305 }
306
307 NodeList identifyNodeList = identifyDocument.getElementsByTagName("Identify");
308 Node identifyNode = null;
309
310 if (identifyNodeList.getLength() == 1 ) {
311 identifyNode = identifyNodeList.item(0);
312 System.out.println("<Identify>\n");
313 }
314 else {
315 out.println("<p>Error: Identify node not found... exiting</p>");
316 return;
317 }
318
319 NodeList identifyChildList = identifyNode.getChildNodes();
320 out.println("<table rules =\"none\">");
321
322 for (int i=0; i < identifyChildList.getLength(); i++) {
323 if (identifyChildList.item(i).getNodeName() == "repositoryName" ) {
324 out.println("<tr><td><strong>Repository Name:</strong></td><td> " + identifyChildList.item(i).getTextContent() + "</td></tr>");
325 }
326 if (identifyChildList.item(i).getNodeName() == "baseURL" ) {
327 String baseURL = identifyChildList.item(i).getTextContent();
328
329 if(baseURL.startsWith("http://")){
330 out.println("<tr><td><strong>Base URL:</strong></td><td><code><a href=\"" + baseURL + "\">" + baseURL + "</a></code></td></tr>");
331 }
332 else{
333 out.println("<tr><td><strong>Base URL:</strong></td><td><code><a href=\"" + oaiURLString + "\">" + oaiURLString + "</a></code></td></tr>");
334 }
335 }
336 }
337 out.println("</table>");
338 }
339 catch (SAXParseException spe) {
340 out.println(spe.getMessage());
341 }
342 catch (SAXException sxe) {
343 out.println(sxe.toString());
344 }
345 catch (ParserConfigurationException pce) {
346 out.println(pce.toString());
347 }
348 catch (IOException ioe) {
349 out.println(ioe.toString());
350 }
351
352 // now get the metadata prefixes
353
354 String metadataFormats = "";
355
356 if (! url.getPath().endsWith("?")) {
357 metadataFormats += "?";
358 }
359 metadataFormats += "verb=ListMetadataFormats";
360
361 URL metadataFormatsURL = new URL (url.toString() + metadataFormats);
362 URLConnection connection2 = metadataFormatsURL.openConnection();
363 connection.connect();
364 Document document2;
365
366 try {
367 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
368 DocumentBuilder builder = factory.newDocumentBuilder();
369 document2 = builder.parse( connection2.getInputStream() );
370
371 NodeList prefixList = document2.getElementsByTagName("metadataPrefix");
372
373 // create UI based on response
374
375 if ( prefixList.getLength() > 0 ) {
376
377 out.println("<p>Choose one metadata prefix to use:</p>");
378 out.println("<form method=\"post\" action=\"http://localhost:"+port_number+"/greenstone3/mat\">");
379 out.println("<table rules =\"none\">");
380
381 for (int i=0; i < prefixList.getLength(); i++) {
382 out.println("<tr><td>");
383 String id = "radioID" + i;
384 String prefix = prefixList.item(i).getTextContent();
385
386 if (prefix.equals("oai_dc")) {
387 out.println("<label for=\"" + id + "\">" + prefix + " (Dublin Core)</label>");
388 //System.out.println("<MetadataElement>Dublin Core<\\MetadataElement>\n");
389 }
390 else {
391 out.println("<label for=\"" + id + "\">" + prefix + "</label>");
392 //System.out.println("<MetadataElement>"+prefix+"<\\MetadataElement>\n");
393 }
394
395 out.print("</td><td><input type=\"radio\" name=\"metadataPrefix\" value=\"" + prefix + "\" ");
396
397 if (prefix.equals("oai_dc")) {
398 out.println("checked=\"checked\" ");
399 }
400 out.println(" id=\"" + id + "\"><br>");
401 out.println("</td></tr>");
402 }
403
404 //System.out.println("</Identify>");
405 out.println("</table>");
406
407 // need to pass oaiurl through as well (again)
408
409 out.println("<input type=\"hidden\" name=\"oaiURL\" value=\"" + url.toString() + "\"><br>");
410 out.println("Max records: <input type=\"text\" name=\"maxrecords\" value=\"500\"><br>");
411
412 // submit button
413
414 out.println("<p><input type=\"submit\" value=\"Continue\"></p>");
415 out.println("</form>");
416
417 out.println("<p> Warning: Generating the statistics and visualization will take some time: </p> ");
418 out.println("<table border = \"1\">");
419 out.println("<tr align=\"right\"><td> No.of Records <td> Estimated Time");
420 out.println("<tr align=\"right\"><td>100<td> 5 minutes");
421 out.println("<tr align=\"right\"><td>500 <td> 10 minutes");
422 out.println("<tr align=\"right\"><td>1000 <td> 18 minutes");
423 out.println("<tr align=\"right\"><td>2000<td> 30 minutes");
424 out.println("</table>");
425
426 out.println("<p> This tool is designed to work with Dublin Core metadata: note that the mapping of qualified Dublin Core to simple Dublin Core (as in <code>oai_dc</code>) may affect the results.");
427 }
428 else {
429 out.println("<p>Error: no metadata prefixes found... exiting</p>");
430 return;
431 }
432 }
433 catch (SAXParseException spe) {
434 out.println(spe.getMessage());
435 }
436 catch (SAXException sxe) {
437 out.println(sxe.toString());
438 }
439 catch (ParserConfigurationException pce) {
440 out.println(pce.toString());
441 }
442 catch (IOException ioe) {
443 out.println(ioe.toString());
444 }
445
446 // check we get a valid XML document back
447 // check OAI-PMH element
448 // check we have a <repositoryName> and <baseURL> elements
449 // maybe check the baseURL against the query we issued?
450
451
452 // if we get here we have a valid non-waikato non-local OAI server
453 // /* process stuff here */
454 // test with University home page - as it doesn't require
455 // dealing with the proxy server
456 // URL url = new URL("http://waikato.ac.nz");
457 // URL url = new URL(oaiURL);
458 // need to go through the proxy here
459 // http://dn.codegear.com/article/29783
460 // http://java.ittoolbox.com/groups/technical-functional/java-l/response-to-proxy-authentication-exception-71438
461 // http://www.jguru.com/faq/view.jsp?EID=13186
462 // http://www.developer.com/java/other/article.php/1551421
463 // System.setProperty("http.proxyHost","http://proxy.scms.waikato.ac.nz");
464 // System.setProperty("http.proxyPort","80");
465 // System.setProperty("http.nonProxyHosts", "localhost|*.waikato.ac.nz");
466 // build a GS 3 collection from the OAI URL
467 // http://www.javaworld.com/javaworld/jw-12-2000/jw-1229-traps.html
468 // http://codon.kribb.re.kr/wiki/display/Java/Using+Runtime.exec+to+invoke+child+process
469
470 out.print("</body></html>");
471 out.close();
472 } // endif
473 } // end doPost
474
475 protected void buildCollection( PrintWriter out, String matShell2, String collName, String oaiURLString, String oaiPrefix) throws ServletException, IOException{
476
477 out.println("<br><br><div id=\"hideshow\" style=\"visibility:hidden\">");
478 out.println("<input type=\"button\" onClick=\"hidediv()\" value=\"hide debug infomation\"/>");
479 out.println("<p>Building collection...</p>");
480
481 String host = "http://localhost:"+port_number+"/greenstone3/Collection_Analysis/";
482
483 out.println("<form method=\"post\" name=\"aForm\">");
484 out.println("<input type=\"hidden\" name=\"collName\" value=\"" + collName + "\"><br>");
485 out.println("<input type=\"hidden\" name=\"collURL\" value=\"" + oaiURLString + "\"><br>");
486 out.println("<input type=\"hidden\" name=\"collHost\" value=\"" + host + "\"><br>");
487 out.println("<input type=\"hidden\" name=\"oaiPrefix\" value=\"" + oaiPrefix + "\"><br>");
488 out.println("</form>");
489 out.println("<p><pre>" + matShell2 + "</pre></p>");
490 out.flush();
491
492 Process p2 = processShell(matShell2, out);
493
494 if (p2.exitValue() == 0){
495 out.println("<p>Collection built.</p>");
496 }
497 else{
498 out.println("<p><b>Collection not built.</b></p>");
499 }
500
501 out.print("</div></body></html>");
502 p2 = null;
503 p2.destroy();
504 out.close();
505 }
506
507 private void downloadCollection( PrintWriter out, HttpServletRequest req, HttpServletResponse res, String oaiPrefix, String Records) throws ServletException, IOException
508 {
509 String oaiURLString = req.getParameter("oaiURL");
510 String metadataprefix = req.getParameter("metadataPrefix");
511 String collName = generateCollName();
512 String host = "http://localhost:"+port_number+"/";
513
514 out.println("<br><br><div id=\"hideshow\" style=\"visibility:hidden\">");
515 out.println("<input type=\"button\" onClick=\"hidediv()\" value=\"hide debug infomation\"/>");
516 out.println("<p>Downloading OAI documents...</p>");
517 out.flush();
518 // move to correct directory
519
520 String maxRecords = Records;
521 String cacheDir = "/tmp/oai-mat-5/"+ collName;
522
523 // downloading
524 String gs3Root = "/research/cc108/greenstone3Project";
525 String collectDir = gs3Root + "/web/sites/localsite/collect";
526 String logFile = "/tmp/mat3.txt";
527
528 String matShell = "source /research/cc108/greenstone3Project/gs2build/bin/script/mat-colbuild-download.bash "
529 + collName
530 + " "
531 + oaiURLString
532 + " "
533 + cacheDir
534 + " "
535 + maxRecords
536 + " "
537 + metadataprefix
538 + " "
539 + gs3Root
540 + " "
541 + collectDir
542 + " "
543 + logFile;
544
545 out.println("<p><pre>" + matShell + "</pre></p>");
546 out.flush();
547 Process p = processShell(matShell, out);
548
549 if (p.exitValue() == 0){
550 out.println("<p>Collection downloaded.</p>");
551 }
552 else {
553 out.println("<p><b>Collection not downloaded properly.</b></p>");
554 }
555 out.flush();
556 //p = null;
557 p.destroy();
558 // return the web page
559 //out.println(req.getRequestURL());
560 // building
561
562 String matShell2 = "source /research/cc108/greenstone3Project/gs2build/bin/script/mat-colbuild.bash "
563 + collName
564 + " "
565 + oaiURLString
566 + " "
567 + cacheDir
568 + " "
569 + maxRecords
570 + " "
571 + metadataprefix
572 + " "
573 + gs3Root
574 + " "
575 + collectDir
576 + " "
577 + logFile;
578
579 out.println("<form method=\"post\" name=\"collectionForm\">");
580 out.println("<input type=\"hidden\" name=\"matShell\" value=\"" + matShell2 + "\">");
581 out.println("<input type=\"hidden\" name=\"collectionName\" value=\"" + collName + "\">");
582 out.println("<input type=\"hidden\" name=\"collectionURL\" value=\"" + oaiURLString + "\">");
583 out.println("<input type=\"hidden\" name=\"oaiPrefix\" value=\"" + oaiPrefix + "\">");
584 out.println("<input type=\"hidden\" name=\"host\" value=\"" + host + "\">");
585 //out.println("<input type=\"submit\" value=\"start\"><br>");
586 out.println("</form>");
587 out.println("</div>");
588 out.print("</body></html>");
589 out.flush();
590 out.close();
591
592 // return page
593 //javascript to submit button in 5 secs
594 //form
595 // hidden inputs
596 // collname
597 // host
598 /////////////////////////////////////////////////////////////////////////////////////////////
599 ////////////////////////////////////////////////////////////////////////////////////////////
600
601 // specify plugins as parameter to mkcol.pl
602 //String makeColl = "perl -S mkcol.pl -creator [email protected] " + collName;
603 //out.println("<p><pre>" + makeColl + "</pre></p>");
604 // specify the OAI server in a config file???
605 // downloadfrom.pl -download_mode OAI -cache_dir -gli -url oaiURL.toString() -get_doc no
606 // what about that 500 doc limit in OAIDownload.pm
607 /*
608 String downloadFrom = "downloadfrom.pl -download_mode OAI -cache_dir "
609 + cacheDir
610 + " -url "
611 + oaiURL.toString()
612 + " -max_records 10"
613 + " -proxy_on"
614 + " -proxy_host "
615 + proxyHost
616 + " -proxy_port "
617 + proxyPort;
618 */
619
620 // -proxy_on -proxy_host wwwcache.cs.waikato.ac.nz -proxy_port 80
621 // out.println("<p><pre>" + downloadFrom + "</pre></p>");
622 // /home/daven/research/greenstone3/gs2build/bin/script/build [options] collection-name
623
624 /*
625 String build = "build -indextype lucene -download file://"
626 + cacheDir
627 + " -log_events " //event log goes to greenstone3/gs2build/etc/events.txt
628 + collName;
629 */
630
631 // out.println("<p><pre>" + build + "</pre></p>");
632 // perl -S importfrom.pl collname
633 // String importFrom = " perl -S importfrom.pl " + collName;
634 // perl -S buildcol.pl collname
635 // String buildColl = "perl -S buildcol.pl " + collName;
636 // need to be in correct directory
637 // String changeDir = "cd $GSDLHOME/collect/" + collName;
638 // build.pl ??
639 // rebuild ?
640 // rm -r index/*
641 // String removeOld = "rm -r index/*";
642 // mv building/* index/
643 // String moveToIndex = "mv building/* index/";
644
645 //////////////////////////////////////
646 /*
647 URLConnection connection = url.openConnection();
648 connection.connect();
649 Map headerMap = connection.getHeaderFields(); // gets the HTTP headers
650
651 out.print("<html><head><title>Analysing...</title></head><body>");
652 out.println ("<img src=http://www.cs.waikato.ac.nz/images-cs/uni.gif>");
653 out.println ("<h1>Metadata Analysis Tool - Alpha</h1>");
654 out.println("<p>Analysing the OAI URL: <code>" + url.toString() + "</code></p>");
655 out.println("<code><pre>");
656
657 Iterator keyValuePairs = headerMap.entrySet().iterator();
658 out.println("size = " + headerMap.size());
659 for (int i = 0; i < headerMap.size(); i++) {
660 out.println("i = " + i);
661 Map.Entry entry = (Map.Entry) keyValuePairs.next();
662 out.println(entry.getKey());
663 out.println(entry.getValue());
664 out.println();
665 }
666
667 out.println(headerMap.toString());
668 out.println("</pre></code>");
669 */
670
671 /*
672 // get the source HTML and insert it into the page - messy
673 out.println("<p>HTML source:</p>");
674 out.println("<pre><code>");
675 BufferedReader in = new BufferedReader(new InputStreamReader(
676 connection.getInputStream()));
677 String inputLine;
678 while ((inputLine = in.readLine()) != null)
679 out.println(inputLine);
680 in.close();
681 out.println("</pre></code>");
682 */
683
684 //out.print("</body></html>");
685
686}
687
688 /* produce a random 7 letter collection name */
689
690 private String generateCollName () {
691 Random random = new Random();
692 StringBuffer message = new StringBuffer();
693 int offset = 97; // = "a"
694 message.append( (char) ( random.nextInt( 26 ) + offset ) );
695 message.append( (char) ( random.nextInt( 26 ) + offset ) );
696 message.append( (char) ( random.nextInt( 26 ) + offset ) );
697 message.append( (char) ( random.nextInt( 26 ) + offset ) );
698 message.append( (char) ( random.nextInt( 26 ) + offset ) );
699 message.append( (char) ( random.nextInt( 26 ) + offset ) );
700 message.append( (char) ( random.nextInt( 26 ) + offset ) );
701 return message.toString();
702 }
703
704 private Process processShell( String command, PrintWriter out) {
705
706 String s= "";
707 try {
708
709 String[] args = new String[]{"sh", "-c", command};
710 Process p = Runtime.getRuntime().exec(args);
711 BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
712 BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));
713 StringBuffer stdInputBuffer = new StringBuffer();
714
715 while ((s = stdInput.readLine()) != null) {
716 stdInputBuffer.append(s+"\n");
717 if(s.contains("-->")){
718 s = s.replace("-->", "");
719 out.println("<!-- " + s + " -->");
720 }
721 else{
722 out.println("<!-- " + s + " -->");
723 }
724 out.flush();
725 }
726
727 StringBuffer stdErrorBuffer = new StringBuffer();
728
729 while ((s = stdError.readLine()) != null) {
730 stdErrorBuffer.append(s+"\n");
731
732 if(s.contains("-->")){
733 s = s.replace("-->", "");
734 out.println("<!-- " + s + " -->");
735 }
736 else{
737 out.println("<!-- " + s + " -->");
738 }
739 out.flush();
740 }
741
742 out.println("<p>Here is the standard output:</p>\n");
743 out.println("<p><pre>" + stdInputBuffer + "</pre></p>");
744 out.println("<p>Here is the standard error (if any):</p>\n");
745 out.println("<p><pre>" + stdErrorBuffer + "</pre></p>");
746 out.flush();
747 //if (p.exitValue() != 0)
748 if (false){
749 out.println("<p>An error occurred while building the collection.</p>");
750 out.println("<p>Here is the standard output:</p>\n");
751 out.println("<p><pre>" + stdInputBuffer + "</pre></p>");
752 out.println("<p>Here is the standard error (if any):</p>\n");
753 out.println("<p><pre>" + stdErrorBuffer + "</pre></p>");
754 }
755
756 InputStream is = p.getInputStream();
757 is.close();
758
759 OutputStream os = p.getOutputStream();
760 os.close();
761
762 InputStream es = p.getErrorStream();
763 es.close();
764
765 stdInput.close();
766 stdError.close();
767
768 return p;
769 }
770 catch (IOException e) {
771 out.println("exception happened - here's what I know: ");
772 out.println(e.toString());
773 out.flush();
774 }
775 return null;
776 }
777
778 private void analyzeCollection(PrintWriter out, String collectionName,String collectionURL,String collectionHost, String Prefix){
779
780 String collName = collectionName;
781 String oaiURLString = collectionURL;
782 String host = collectionHost;
783
784 try{
785 DescribeMessager dm = new DescribeMessager(collName,oaiURLString);
786 out.println("<p>Generating statistics and visualisations...</p>");
787 out.flush();
788 out.println("<p>please wait.</p>");
789 out.flush();
790 boolean status = dm.describeMatadata(out,collName,oaiURLString,Prefix);
791
792 if(status){
793 out.println("<script type=\"text/javascript\" language=\"JavaScript\">");
794 out.println("reconfig();");
795 out.println("</script>");
796 out.println("<a href=\""+host+collName+"/Overall.html\">View the report</a>");
797 }
798 out.println("</body></html>");
799 }catch(Exception e){e.printStackTrace(out);
800 out.println("<p><pre>"+e.toString()+"</pre></p>");
801 }
802 out.close();
803 }
804}
Note: See TracBrowser for help on using the repository browser.