source: other-projects/FileTransfer-WebSocketPair/testGXTWithGreenstone/src/org/greenstone/gatherer/download/DownloadJob.java@ 33053

Last change on this file since 33053 was 33053, checked in by ak19, 5 years ago

I still had some stuff of Nathan Kelly's (FileTransfer-WebSocketPair) sitting on my USB. Had already commited the Themes folder at the time, 2 years back. Not sure if he wanted this additional folder commited. But I didn't want to delete it and decided it will be better off on SVN. When we use his project, if we find we didn't need this test folder, we can remove it from svn then.

File size: 22.4 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.download;
38
39import java.awt.event.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.tree.*;
44import org.greenstone.gatherer.Configuration;
45import org.greenstone.gatherer.DebugStream;
46import org.greenstone.gatherer.Dictionary;
47import org.greenstone.gatherer.Gatherer;
48import org.greenstone.gatherer.GAuthenticator;
49import org.greenstone.gatherer.greenstone.LocalGreenstone;
50import org.greenstone.gatherer.file.WorkspaceTree;
51import org.greenstone.gatherer.util.AppendLineOnlyFileDocument;
52import org.greenstone.gatherer.util.Utility;
53import org.greenstone.gatherer.cdm.Argument;
54import org.greenstone.gatherer.collection.*;
55/**
56 * @author John Thompson, Greenstone Digital Library, University of Waikato
57 * @version 2.0
58 */
59public class DownloadJob
60 implements ActionListener {
61
62 private boolean debug;
63 private boolean higher_directories;
64 private boolean no_parents;
65 private boolean other_hosts;
66 private boolean page_requisites;
67 private boolean quiet;
68
69 private AppendLineOnlyFileDocument download_log;
70
71 private DownloadProgressBar progress;
72
73 private int depth;
74 private int previous_state;
75 private int state;
76
77 private String download_url = "";
78
79 // private String current_url;
80 // private String destination;
81 private String proxy_pass;
82 private String proxy_user;
83
84 private Vector encountered_urls;
85 private Vector failed_urls;
86 private Download download;
87 private DownloadScrollPane mummy;
88 private HashMap download_option;
89
90 public static int COMPLETE = 0;
91 public static int PAUSED = 1;
92 public static int RUNNING = 2;
93 public static int STOPPED = 3;
94
95 public static int UNKNOWN_MAX = 0;
96 public static int DEFINED_MAX = 1;
97 public static int UNDEFINED_MAX = 2;
98
99 // To prematurely terminate wget, we will need to use sockets and find a free port.
100 // We will look at a limited range of ports. This range will be reused (circular buffer)
101 private static final int PORT_BASE = 50000;
102 private static final int PORT_BLOCK_SIZE = 100;
103 private static int nextFreePort = PORT_BASE; // Keep track what port numbers we have checked for availability
104 int port; // package access. The socket port number this instance of DownloadJob will use
105
106 private String mode = null;
107
108 private String proxy_url;
109
110 /**
111 */
112 public DownloadJob(Download download, String proxy_pass, String proxy_user, DownloadScrollPane mummy, String mode, String proxy_url) {
113 URL url = null;
114 int folder_hash;
115
116 this.proxy_url = proxy_url;
117
118 download_option = downloadToHashMap(download);
119 if (!mode.equals("Z3950") && !mode.equals("SRW")) {
120 Argument url_arg = (Argument)download_option.get((String)"url");
121 download_url = url_arg.getValue();
122
123 }
124 else {
125 Argument host_arg = (Argument)download_option.get((String)"host");
126 Argument port_arg = (Argument)download_option.get((String)"port");
127 download_url = host_arg.getValue() + ":" +port_arg.getValue();
128 }
129
130 folder_hash = download_url.hashCode();
131 String log_filename = Utility.getLogDir(null) + "download-"+ mode + folder_hash + ".log";
132 File log_file = new File(log_filename);
133 if(log_file.exists()) {
134 log_file.delete();
135 }
136
137 File parent_log_file = log_file.getParentFile();
138 parent_log_file.mkdirs();
139 parent_log_file = null;
140 log_file = null;
141
142 this.download_log = new AppendLineOnlyFileDocument(log_filename, false);
143
144 this.proxy_pass = proxy_pass;
145 this.proxy_user = proxy_user;
146 this.mummy = mummy;
147 this.mode = mode;
148 this.download = download;
149
150 progress = new DownloadProgressBar(this,download_url, true);
151 encountered_urls = new Vector();
152 failed_urls = new Vector();
153
154 previous_state = STOPPED;
155 state = STOPPED;
156 }
157
158 private HashMap downloadToHashMap(Download download)
159 {
160 HashMap download_option = new HashMap();
161 ArrayList arguments = download.getArguments(true, false);
162 for(int i = 0; i < arguments.size(); i++) {
163 Argument argument = (Argument) arguments.get(i);
164 download_option.put(argument.getName(), argument);
165 }
166 return download_option;
167 }
168
169 /** Depending on which button on the progress bar was pushed,
170 * this method will affect the state of the DownloadJob and perhaps make
171 * calls to wget.class if necessary.
172 * @param event The ActionEvent fired from within the DownloadProgressBar
173 * which we must respond to.
174 */
175 public void actionPerformed(ActionEvent event) {
176 // The stop_start_button is used to alternately start or stop the
177 // job. If the current state of the job is paused then this
178 // restart is logically equivalent to a resume.
179 if(event.getSource() == progress.stop_start_button) {
180 previous_state = state;
181 if (state == RUNNING) {
182 state = STOPPED;
183 } else {
184 //previous_state = state;
185 state = RUNNING;
186 mummy.resumeThread();
187 }
188 }
189 else if (event.getSource() == progress.close_button) {
190 if(state == RUNNING) {
191 previous_state = state;
192 state = STOPPED; // do we need to do anything else to stop this?
193 }
194 mummy.deleteDownloadJob(this);
195 }
196 }
197
198 /** Given a portnumber to check, returns true if it is available
199 * (if nothing's listening there already). */
200 public static boolean isPortAvailable(int portnum) {
201 Socket tmpSocket = null;
202 try {
203 tmpSocket = new Socket("localhost", portnum);
204 tmpSocket.close();
205 return false;
206
207 } catch(ConnectException ex){
208 // "Signals that an error occurred while attempting to connect a socket
209 // to a remote address and port. Typically, the connection was refused
210 // remotely (e.g., no process is listening on the remote address/port)."
211 System.err.println("Port " + portnum + " not yet in use.");
212 tmpSocket = null;
213 return true;
214
215 } catch(Exception ex) {
216 // includes BindException "Signals that an error occurred while attempting
217 // to bind a socket to a local address and port. Typically, the port is in
218 // use, or the requested local address could not be assigned."
219 tmpSocket = null;
220 return false;
221 }
222 }
223
224 /** Circular buffer. Modifies the value of nextFreePort (the buffer index). */
225 private void incrementNextFreePort() {
226 int offset = nextFreePort - PORT_BASE;
227 offset = (offset + 1) % PORT_BLOCK_SIZE;
228 nextFreePort = PORT_BASE + offset;
229 }
230
231 public void callDownload() {
232
233 ArrayList command_list = new ArrayList();
234
235 // the following also works for client-gli if downloading is enabled (when there's a gs2build directory inside gli)
236 command_list.add(Configuration.perl_path);
237 command_list.add("-S");
238 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
239 command_list.add("-download_mode");
240 command_list.add(mode);
241 command_list.add("-cache_dir");
242 command_list.add(Gatherer.getGLIUserCacheDirectoryPath());
243 // For the purposes of prematurely terminating wget from GLI (which creates a socket
244 // as a communication channel between GLI and Perl), it is important to tell the script
245 // that we're running as GLI. Because when running from the command prompt, it should
246 // not create this socket and do the related processing.
247 command_list.add("-gli");
248
249 ArrayList all_arg = download.getArguments(true,false);
250 for(int i = 0; i < all_arg.size(); i++) {
251 Argument argument = (Argument) all_arg.get(i);
252 if(argument.isAssigned()) {
253 command_list.add("-" + argument.getName());
254 if(argument.getType() != Argument.FLAG) {
255 command_list.add(argument.getValue());
256 }
257 }
258 }
259
260 String [] cmd = (String []) command_list.toArray(new String[0]);
261 DebugStream.println("Download job, "+command_list);
262
263 if (previous_state == DownloadJob.COMPLETE) {
264 progress.mirrorBegun(true, true);
265 }
266 else {
267 progress.mirrorBegun(false, true);
268 }
269
270 try {
271 Runtime rt = Runtime.getRuntime();
272
273 String [] env = null;
274
275 Process prcs = null;
276
277
278 if (Utility.isWindows()) {
279 prcs = rt.exec(cmd);
280 }
281 else {
282 if (proxy_url != null && !proxy_url.equals("")) {
283 // Specify proxies as environment variables
284 // Need to manually specify GSDLHOME and GSDLOS also
285 env = new String[4];
286 proxy_url = proxy_url.replaceAll("http://","");
287 env[0] = "http_proxy=http://"+proxy_url;
288 env[1] = "ftp_proxy=ftp://"+proxy_url;
289 env[2] = "GSDLHOME=" + Configuration.gsdl_path;
290 env[3] = "GSDLOS=" + Gatherer.client_operating_system;
291 prcs = rt.exec(cmd, env);
292 }
293 else if(Gatherer.isGsdlRemote && Gatherer.isDownloadEnabled) {
294 // Not Windows, but running client with download panel
295 // Need to manually specify GSDLHOME and GSDLOS
296 env = new String[2];
297 env[0] = "GSDLHOME=" + Configuration.gsdl_path;
298 env[1] = "GSDLOS=" + Gatherer.client_operating_system;
299 prcs = rt.exec(cmd, env);
300 }
301 else {
302 // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set
303 prcs = rt.exec(cmd);
304 }
305 }
306 //System.out.println(newcmd);
307
308 // Can use the following if debugging WgetDownload.pm - Reads debug stmts from the perl process' STDIN stream
309 //(new PerlReaderThread(prcs)).start();
310
311 InputStream is = prcs.getInputStream();
312 BufferedReader reader = new BufferedReader(new InputStreamReader(is));
313
314 // To be able to stop Wget, we use sockets to communicate with the perl process that launched wget
315 if (mode.equals("Web") || mode.equals("MediaWiki")) { // wget download modes other than OAI
316
317 // Need to find an available (unused) port within the range we're looking for to pass it
318 // the Perl child process, so that it may set up a listening ServerSocket at that port number
319 try {
320 boolean foundFreePort = false;
321 for(int i = 0; i < PORT_BLOCK_SIZE; i++) {
322
323 if(isPortAvailable(nextFreePort)) {
324 foundFreePort = true;
325 break;
326
327 } else {
328 incrementNextFreePort();
329 }
330 }
331
332 if(foundFreePort) {
333 // Free port number currently found becomes the port number of the socket that this
334 // DownloadJob instance will be connecting to when the user wants to prematurely stop Wget.
335 this.port = nextFreePort;
336 incrementNextFreePort();
337
338 } else {
339 throw new Exception("Cannot find an available port in the range "
340 + PORT_BASE + "-" + (PORT_BASE+PORT_BLOCK_SIZE)
341 + "\nwhich is necessary for forcibly terminating wget.");
342 }
343
344 // Communicate the chosen port for this DownloadJob instance to the perl process, so
345 // that it can set up a ServerSocket at that port to listen for any signal to terminate wget
346 OutputStream os = prcs.getOutputStream();
347 String p = ""+this.port+"\n";
348 System.err.println("Portnumber found: " + p);
349
350 os.write(p.getBytes());
351 os.close();
352
353 } catch(Exception ex) {
354 System.err.println("Sent available portnumber " + this.port + " to process' outputstream.\nBut got exception: " + ex);
355 }
356 }
357
358 BufferedReader br = new BufferedReader(new InputStreamReader(prcs.getErrorStream()));
359 // Capture the standard error stream and search for two particular occurrences.
360 String line="";
361 boolean ignore_for_robots = false;
362 int max_download = DownloadJob.UNKNOWN_MAX;
363
364 while ((line = br.readLine()) != null && !line.trim().equals("<<Finished>>") && state != STOPPED) {
365 if ( max_download == DownloadJob.UNKNOWN_MAX) {
366 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
367 max_download = DownloadJob.DEFINED_MAX;
368 }
369 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
370 max_download = DownloadJob.UNDEFINED_MAX;
371 }
372 }
373 else if(max_download == DownloadJob.UNDEFINED_MAX) {
374 DebugStream.println(line);
375 download_log.appendLine(line);
376 // The first magic special test is to see if we've just
377 // asked for the robots.txt file. If so we ignore
378 // the next add and then the next complete/error.
379 if(line.lastIndexOf("robots.txt;") != -1) {
380 DebugStream.println("***** Requesting robot.txt");
381 ignore_for_robots = true;
382 }
383 // If line contains "=> `" display text as the
384 // currently downloading url. Unique to add download.
385 else if(line.lastIndexOf("=> `") != -1) {
386 if(!ignore_for_robots) {
387 // Add download
388 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
389 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
390 }
391 }
392 // If line contains "/s) - `" set currently
393 // downloading url to "Download Complete".
394 else if(line.lastIndexOf("/s) - `") != -1) {
395 String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
396 if(!ignore_for_robots) {
397 DebugStream.println("Not ignore for robots");
398 // Download complete
399 downloadComplete(current_file_downloading);
400 }
401 else {
402 DebugStream.println("Ignore for robots");
403 ignore_for_robots = false;
404 }
405 }
406 // The already there line begins "File `..." However this
407 // is only true in english, so instead I looked and there
408 // are few (if any at all) other messages than those above
409 // and not overwriting messages that use " `" so we'll
410 // look for that. Note this method is not guarenteed to be
411 // unique like the previous two.
412 else if(line.lastIndexOf(" `") != -1) {
413 // Not Overwriting
414 DebugStream.println("Already there.");
415 String new_url =
416 line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
417 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
418 downloadWarning();
419 }
420 // Any other important message starts with the time in the form hh:mm:ss
421 else if(line.length() > 7) {
422 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
423 if(!ignore_for_robots) {
424 DebugStream.println("Error.");
425 downloadFailed();
426 }
427 else {
428 ignore_for_robots = false;
429 }
430 }
431 }
432 }
433 else if (max_download == DownloadJob.DEFINED_MAX) {
434 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
435 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
436 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
437 progress.resetFileCount();
438 progress.addDownload("files"); // for display: "Downloading files"
439 }
440 else if (line.lastIndexOf("<<Done>>") != -1) {
441 progress.increaseFileCount();
442 }
443 else if(line.lastIndexOf("<<Done:") != -1) {
444 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
445 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
446 }
447
448 DebugStream.println(line);
449 download_log.appendLine(line);
450 }
451 else {
452 System.out.println("Error!!");
453 System.exit(-1);
454 }
455 }
456
457 if(state == STOPPED) {
458 boolean terminatePerlScript = true;
459
460 // When GLI is working with wget-based download modes other than OAI (MediaWiki and Web
461 // download) and the STOP button has been pressed, wget needs to be prematurely terminated.
462 // Only wget download modes Web and MediaWiki require the use of sockets to communicate
463 // with the perl script in order to get wget to terminate. Other download modes, including
464 // wgetdownload mode OAI, can terminate in the traditional manner: close process inputstream
465 // and kill perl process. OAI launches many wgets. So that when the perl process is terminated,
466 // the currently running wget will finish off but other wgets are no longer launched.
467 if(prcs != null && (mode.equals("Web") || mode.equals("MediaWiki"))) {
468
469 // create a socket to the perl child process and communicate the STOP message
470 Socket clientSocket = null;
471 if(clientSocket == null) {
472 try {
473 clientSocket = new Socket("localhost", this.port); // connect to the port chosen for this DownloadJob instance
474
475 BufferedReader clientReader = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
476 String response = clientReader.readLine(); // see if we've been connected
477 System.err.println("Communicating with perl download script on port " + this.port
478 + "\nGot response from perl: " + response);
479
480 // Send the STOP signal
481 OutputStream os = clientSocket.getOutputStream();
482 String message = "<<STOP>>\n";
483 os.write(message.getBytes());
484 response = clientReader.readLine(); // see whether the stop signal has been received
485 System.err.println("GLI sent STOP signal to perl to terminate wget."
486 + "\nGot response from perl: " + response);
487
488 response = clientReader.readLine(); // see whether the perl script is ready to be terminated
489 System.err.println("Got another response from perl: " + response);
490 os.close();
491
492 clientReader.close();
493 clientSocket.close(); // close the clientSocket (the Perl end will close the server socket that Perl opened)
494 clientReader = null;
495 clientSocket = null;
496
497 if(response == null) {
498 terminatePerlScript = false;
499 }
500 } catch(IOException ex) {
501 System.err.println("Tried to communicate through client socket - port " + this.port + ", but got exception: " + ex);
502 } catch(Exception ex) {
503 System.err.println("Tried to open client socket, but got exception: " + ex);
504 }
505 }
506 }
507
508 //prcs.getInputStream().close();
509 prcs.getErrorStream().close();
510 br.close();
511 br = null;
512 if(terminatePerlScript) {
513 prcs.destroy(); // This doesn't always work, but it's worth a try
514 prcs = null;
515 }
516
517 // Notify the DownloadScrollPane which is waiting on this job to complete that we are ready
518 synchronized(this) {
519 this.notify();
520 }
521 }
522 }
523 catch (Exception ioe) {
524 //message(Utility.ERROR, ioe.toString());
525 //JTest
526 DebugStream.printStackTrace(ioe);
527 }
528 // If we've got to here and the state isn't STOPPED then the
529 // job is complete.
530 if(state == DownloadJob.RUNNING) {
531 progress.mirrorComplete();
532 previous_state = state;
533 state = DownloadJob.COMPLETE;
534 }
535 // refresh the workspace tree
536 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
537 }
538
539
540 /** Called by the WGet native code when the current download is
541 * completed. In turn all download listeners are informed.
542 */
543 public void downloadComplete() {
544 progress.downloadComplete();
545 }
546
547
548 public void downloadComplete(String current_file_downloading)
549 {
550 progress.downloadComplete();
551 DebugStream.println("Download complete: " + current_file_downloading);
552 }
553
554
555 /** Called by the WGet native code when the requested download returns
556 * a status code other than 200.
557 */
558 public void downloadFailed() {
559 // TODO!!
560 //failed_urls.add(current_url); // It is the current url that failed
561 progress.downloadFailed();
562 //DebugStream.println("Download failed: " + current_url);
563 }
564
565 /**
566 */
567 public void downloadWarning() {
568 progress.downloadWarning();
569 }
570
571 public AppendLineOnlyFileDocument getLogDocument() {
572 return download_log;
573 }
574
575 /**
576 * @return Returns the progress bar associated with this job.
577 */
578 public DownloadProgressBar getProgressBar() {
579 return progress;
580 }
581
582 /** Called to discover if the user wanted this thread to run or if
583 * it is paused.
584 * @return An int representing the current DownloadJob state.
585 */
586 public int getState() {
587 return state;
588 }
589
590 /** Returns the current state of the stop flag for this job.
591 * @return A boolean representing whether the user has requested to
592 * stop.
593 */
594 public boolean hasSignalledStop() {
595 if(state == DownloadJob.STOPPED || state == DownloadJob.PAUSED ||
596 state == DownloadJob.COMPLETE) {
597 return true;
598 }
599 return false;
600 }
601
602 public void setState(int state) {
603 previous_state = this.state;
604 this.state = state;
605 }
606
607 /** A convenience call.
608 * @return A String representing the url of the initial url (root node of the mirrored tree).
609 */
610 public String toString() {
611 return download_url;
612 }
613
614 /** Called by the WGet native code to signal the current progress of
615 * downloading.
616 * @param current A long representing the number of bytes that have
617 * been downloaded since the last update.
618 * @param expected A long representing the total number of bytes
619 * expected for this download.
620 */
621 public void updateProgress(long current, long expected) {
622 progress.updateProgress(current, expected);
623 }
624
625
626 // Inner thread class that reads from process downloadfrom.pl's errorstream
627 private class PerlReaderThread extends Thread {
628 Process prcs = null;
629
630 public PerlReaderThread(Process proc) {
631 this.prcs = proc;
632 }
633
634 public void run() {
635 try {
636 if(prcs != null) {
637 String message = null;
638 BufferedReader eReader = new BufferedReader(new InputStreamReader(prcs.getInputStream()));
639 while(prcs != null && (message = eReader.readLine()) != null) {
640 if(!message.equals("\n")) {
641 System.err.println("**** Perl STDOUT: " + message);
642 }
643 }
644
645 if(prcs != null && eReader != null) {
646 eReader.close();
647 eReader = null;
648 System.err.println("**** Perl ENDed.");
649 }
650 }
651 } catch(Exception e) {
652 System.err.println("Thread - caught exception: " + e);
653 }
654 }
655 }
656}
Note: See TracBrowser for help on using the repository browser.