source: main/trunk/gli/src/org/greenstone/gatherer/download/DownloadJob.java@ 31720

Last change on this file since 31720 was 31720, checked in by ak19, 7 years ago
  1. GLI's DownloadJobs don't allow pausing and resuming (the button used to work, but didn't ever pause/resume in the background, at least it's been stopping the wget download activity after wget related changes from some years back). Changing the Pause/Resume button in the DownloadProgressBar to the Stop/Stopped button. 2. Added another useful link on InterruptedException to SafeProgress and its documentation.
  • Property svn:keywords set to Author Date Id Revision
File size: 48.2 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.download;
38
39import java.awt.event.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.tree.*;
44import javax.swing.SwingUtilities;
45import org.greenstone.gatherer.Configuration;
46import org.greenstone.gatherer.DebugStream;
47import org.greenstone.gatherer.Dictionary;
48import org.greenstone.gatherer.Gatherer;
49import org.greenstone.gatherer.GAuthenticator;
50import org.greenstone.gatherer.greenstone.LocalGreenstone;
51import org.greenstone.gatherer.file.WorkspaceTree;
52import org.greenstone.gatherer.util.AppendLineOnlyFileDocument;
53import org.greenstone.gatherer.util.SafeProcess;
54import org.greenstone.gatherer.util.Utility;
55import org.greenstone.gatherer.cdm.Argument;
56import org.greenstone.gatherer.collection.*;
57
58/**
59 * @author John Thompson, Greenstone Digital Library, University of Waikato
60 * @version 2.0
61 * When modifying this class, bear in mind concurrency issues that could arise with
62 * SafeProcess's worker threads and where synchronization may be needed to prevent such issues.
63 */
64public class DownloadJob
65 implements ActionListener, SafeProcess.MainProcessHandler {
66
67 private AppendLineOnlyFileDocument download_log;
68
69 private DownloadProgressBar progress;
70
71 private int previous_state;
72 private int state;
73
74 private SafeProcess prcs = null;
75
76 private final String download_url;
77 private boolean wasClosed = false;
78
79 // private String current_url;
80 // private String destination;
81 private final String proxy_pass;
82 private final String proxy_user;
83
84 //private final Vector encountered_urls;
85 //private Vector failed_urls;
86 private Download download;
87 private DownloadScrollPane mummy;
88 private HashMap download_option;
89
90 public static final int COMPLETE = 0;
91 public static final int PAUSED = 1;
92 public static final int RUNNING = 2;
93 public static final int STOPPED = 3;
94
95 public static final int UNKNOWN_MAX = 0;
96 public static final int DEFINED_MAX = 1;
97 public static final int UNDEFINED_MAX = 2;
98
99 // To prematurely terminate wget, we will need to use sockets and find a free port.
100 // We will look at a limited range of ports. This range will be reused (circular buffer)
101 private static final int PORT_BASE = 50000;
102 private static final int PORT_BLOCK_SIZE = 100;
103 private static int nextFreePort = PORT_BASE; // Keep track what port numbers we have checked for availability
104 int port; // package access. The socket port number this instance of DownloadJob will use
105 // only the main thread (where DownloadJob runs) modifies port, so no synching needed
106
107 private final String mode;
108
109 private String proxy_url; // only the main thread (where DownloadJob runs) modifies this, so no synching needed
110
111 /**
112 */
113 public DownloadJob(Download download, String proxy_pass, String proxy_user, DownloadScrollPane mummy, String mode, String proxy_url) {
114 URL url = null;
115 int folder_hash;
116
117 this.proxy_url = proxy_url;
118
119 download_option = downloadToHashMap(download);
120 if (!mode.equals("Z3950") && !mode.equals("SRW")) {
121 Argument url_arg = (Argument)download_option.get((String)"url");
122 download_url = url_arg.getValue();
123
124 }
125 else {
126 Argument host_arg = (Argument)download_option.get((String)"host");
127 Argument port_arg = (Argument)download_option.get((String)"port");
128 download_url = host_arg.getValue() + ":" +port_arg.getValue();
129 }
130
131 folder_hash = download_url.hashCode();
132 String log_filename = Utility.getLogDir(null) + "download-"+ mode + folder_hash + ".log";
133 File log_file = new File(log_filename);
134 if(log_file.exists()) {
135 log_file.delete();
136 }
137
138 File parent_log_file = log_file.getParentFile();
139 parent_log_file.mkdirs();
140 parent_log_file = null;
141 log_file = null;
142
143 this.download_log = new AppendLineOnlyFileDocument(log_filename, false);
144
145 this.proxy_pass = proxy_pass;
146 this.proxy_user = proxy_user;
147 this.mummy = mummy;
148 this.mode = mode;
149 this.download = download;
150
151 progress = new DownloadProgressBar(this,download_url, true);
152 //encountered_urls = new Vector();
153 //failed_urls = new Vector();
154
155 previous_state = STOPPED;
156 state = STOPPED;
157 }
158
159 private HashMap downloadToHashMap(Download download)
160 {
161 HashMap download_option = new HashMap();
162 ArrayList arguments = download.getArguments(true, false);
163 for(int i = 0; i < arguments.size(); i++) {
164 Argument argument = (Argument) arguments.get(i);
165 download_option.put(argument.getName(), argument);
166 }
167 return download_option;
168 }
169
170 /** Depending on which button on the progress bar was pushed,
171 * this method will affect the state of the DownloadJob and perhaps make
172 * calls to wget.class if necessary.
173 * @param event The ActionEvent fired from within the DownloadProgressBar
174 * which we must respond to.
175 */
176 public void old_actionPerformed(ActionEvent event) {
177 // The stop_start_button is used to alternately start or stop the
178 // job. If the current state of the job is paused then this
179 // restart is logically equivalent to a resume.
180 if(event.getSource() == progress.stop_start_button) {
181 previous_state = state;
182 if (state == RUNNING) {
183 state = STOPPED;
184 } else {
185 //previous_state = state;
186 state = RUNNING;
187 mummy.resumeThread();
188 }
189 }
190 else if (event.getSource() == progress.close_button) {
191 if(state == RUNNING) {
192 previous_state = state;
193 state = STOPPED; // do we need to do anything else to stop this?
194 }
195 mummy.deleteDownloadJob(this);
196 }
197 }
198
199 /** Depending on which button on the progress bar was pushed,
200 * this method will affect the state of the DownloadJob and perhaps make
201 * calls to wget.class if necessary.
202 * @param event The ActionEvent fired from within the DownloadProgressBar
203 * which we must respond to.
204 * Now using synchronized methods like previous_state = getState(); instead of
205 * previous_state = state; and setState(STOPPED); instead of state = STOPPED;
206 */
207 public void actionPerformed(ActionEvent event) {
208 // The stop_start_button is used to alternately start or stop the
209 // job. If the current state of the job is paused then this
210 // restart is logically equivalent to a resume.
211 if(event.getSource() == progress.stop_start_button) {
212 previous_state = getState();
213 if (getState() == RUNNING) {
214 //setState(STOPPED);
215 stopDownload(); // cancels any running SafeProcess
216 } else {
217 //previous_state = getState();
218 setState(RUNNING);
219 mummy.resumeThread();
220 }
221 }
222 else if (event.getSource() == progress.close_button) {
223 setClosed();
224 SafeProcess.log("@@@ Progress bar close button pressed");
225 if(getState() == RUNNING) {
226 previous_state = getState();
227 //setState(STOPPED); // do we need to do anything else to stop this? YES, we do:
228 stopDownload(); // cancels any running SafeProcess
229 }
230 mummy.deleteDownloadJob(this);
231 }
232 }
233
234 /** Given a portnumber to check, returns true if it is available
235 * (if nothing's listening there already). */
236 public static boolean isPortAvailable(int portnum) {
237 Socket tmpSocket = null;
238 try {
239 tmpSocket = new Socket("localhost", portnum);
240 tmpSocket.close();
241 return false;
242
243 } catch(ConnectException ex){
244 // "Signals that an error occurred while attempting to connect a socket
245 // to a remote address and port. Typically, the connection was refused
246 // remotely (e.g., no process is listening on the remote address/port)."
247 System.err.println("Port " + portnum + " not yet in use.");
248 tmpSocket = null;
249 return true;
250
251 } catch(Exception ex) {
252 // includes BindException "Signals that an error occurred while attempting
253 // to bind a socket to a local address and port. Typically, the port is in
254 // use, or the requested local address could not be assigned."
255 tmpSocket = null;
256 return false;
257 }
258 }
259
260 /** Circular buffer. Modifies the value of nextFreePort (the buffer index). */
261 private void incrementNextFreePort() {
262 int offset = nextFreePort - PORT_BASE;
263 offset = (offset + 1) % PORT_BLOCK_SIZE;
264 nextFreePort = PORT_BASE + offset;
265 }
266
267 // If eschewing the use of SafeProcess, reactivate (by renaming) old_callDownload()
268 // and old_actionPerformed(), and DownloadScrollPane.java's old_deleteDownloadJob().
269 public void old_callDownload() {
270
271 ArrayList command_list = new ArrayList();
272
273 // the following also works for client-gli if downloading is enabled (when there's a gs2build directory inside gli)
274 command_list.add(Configuration.perl_path);
275 command_list.add("-S");
276 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
277 command_list.add("-download_mode");
278 command_list.add(mode);
279 command_list.add("-cache_dir");
280 command_list.add(Gatherer.getGLIUserCacheDirectoryPath());
281 // For the purposes of prematurely terminating wget from GLI (which creates a socket
282 // as a communication channel between GLI and Perl), it is important to tell the script
283 // that we're running as GLI. Because when running from the command prompt, it should
284 // not create this socket and do the related processing.
285 command_list.add("-gli");
286
287 ArrayList all_arg = download.getArguments(true,false);
288 for(int i = 0; i < all_arg.size(); i++) {
289 Argument argument = (Argument) all_arg.get(i);
290 if(argument.isAssigned()) {
291 command_list.add("-" + argument.getName());
292 if(argument.getType() != Argument.FLAG) {
293 command_list.add(argument.getValue());
294 }
295 }
296 }
297
298 String [] cmd = (String []) command_list.toArray(new String[0]);
299 DebugStream.println("Download job, "+command_list);
300
301 if (previous_state == DownloadJob.COMPLETE) {
302 progress.mirrorBegun(true, true);
303 }
304 else {
305 progress.mirrorBegun(false, true);
306 }
307
308 try {
309 Runtime rt = Runtime.getRuntime();
310
311 String [] env = null;
312
313 Process prcs = null;
314
315
316 if (Utility.isWindows()) {
317 prcs = rt.exec(cmd);
318 }
319 else {
320 if (proxy_url != null && !proxy_url.equals("")) {
321 // Specify proxies as environment variables
322 // Need to manually specify GSDLHOME and GSDLOS also
323 env = new String[4];
324 proxy_url = proxy_url.replaceAll("http://","");
325 env[0] = "http_proxy=http://"+proxy_url;
326 env[1] = "ftp_proxy=ftp://"+proxy_url;
327 env[2] = "GSDLHOME=" + Configuration.gsdl_path;
328 env[3] = "GSDLOS=" + Gatherer.client_operating_system;
329 prcs = rt.exec(cmd, env);
330 }
331 else if(Gatherer.isGsdlRemote && Gatherer.isDownloadEnabled) {
332 // Not Windows, but running client with download panel
333 // Need to manually specify GSDLHOME and GSDLOS
334 env = new String[2];
335 env[0] = "GSDLHOME=" + Configuration.gsdl_path;
336 env[1] = "GSDLOS=" + Gatherer.client_operating_system;
337 prcs = rt.exec(cmd, env);
338 }
339 else {
340 // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set
341 prcs = rt.exec(cmd);
342 }
343 }
344 //System.out.println(newcmd);
345
346 // Can use the following if debugging WgetDownload.pm - Reads debug stmts from the perl process' STDIN stream
347 //(new PerlReaderThread(prcs)).start();
348
349 InputStream is = prcs.getInputStream();
350 BufferedReader reader = new BufferedReader(new InputStreamReader(is));
351
352 // To be able to stop Wget, we use sockets to communicate with the perl process that launched wget
353 if (mode.equals("Web") || mode.equals("MediaWiki")) { // wget download modes other than OAI
354
355 // Need to find an available (unused) port within the range we're looking for to pass it
356 // the Perl child process, so that it may set up a listening ServerSocket at that port number
357 try {
358 boolean foundFreePort = false;
359 for(int i = 0; i < PORT_BLOCK_SIZE; i++) {
360
361 if(isPortAvailable(nextFreePort)) {
362 foundFreePort = true;
363 break;
364
365 } else {
366 incrementNextFreePort();
367 }
368 }
369
370 if(foundFreePort) {
371 // Free port number currently found becomes the port number of the socket that this
372 // DownloadJob instance will be connecting to when the user wants to prematurely stop Wget.
373 this.port = nextFreePort;
374 incrementNextFreePort();
375
376 } else {
377 throw new Exception("Cannot find an available port in the range "
378 + PORT_BASE + "-" + (PORT_BASE+PORT_BLOCK_SIZE)
379 + "\nwhich is necessary for forcibly terminating wget.");
380 }
381
382 // Communicate the chosen port for this DownloadJob instance to the perl process, so
383 // that it can set up a ServerSocket at that port to listen for any signal to terminate wget
384 OutputStream os = prcs.getOutputStream();
385 String p = ""+this.port+"\n";
386 System.err.println("Portnumber found: " + p);
387
388 os.write(p.getBytes());
389 os.close();
390
391 } catch(Exception ex) {
392 System.err.println("Sent available portnumber " + this.port + " to process' outputstream.\nBut got exception: " + ex);
393 }
394 }
395
396 BufferedReader br = new BufferedReader(new InputStreamReader(prcs.getErrorStream()));
397 // Capture the standard error stream and search for two particular occurrences.
398 String line="";
399 boolean ignore_for_robots = false;
400 int max_download = DownloadJob.UNKNOWN_MAX;
401
402 while ((line = br.readLine()) != null && !line.trim().equals("<<Finished>>") && state != STOPPED) {
403 if ( max_download == DownloadJob.UNKNOWN_MAX) {
404 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
405 max_download = DownloadJob.DEFINED_MAX;
406 }
407 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
408 max_download = DownloadJob.UNDEFINED_MAX;
409 }
410 }
411 else if(max_download == DownloadJob.UNDEFINED_MAX) {
412 DebugStream.println(line);
413 download_log.appendLine(line);
414 // The first magic special test is to see if we've just
415 // asked for the robots.txt file. If so we ignore
416 // the next add and then the next complete/error.
417 if(line.lastIndexOf("robots.txt;") != -1) {
418 DebugStream.println("***** Requesting robot.txt");
419 ignore_for_robots = true;
420 }
421 // If line contains "=> `" display text as the
422 // currently downloading url. Unique to add download.
423 else if(line.lastIndexOf("=> `") != -1) {
424 if(!ignore_for_robots) {
425 // Add download
426 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
427 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
428 }
429 }
430 // If line contains "/s) - `" set currently
431 // downloading url to "Download Complete".
432 else if(line.lastIndexOf("/s) - `") != -1) {
433 String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
434 if(!ignore_for_robots) {
435 DebugStream.println("Not ignore for robots");
436 // Download complete
437 downloadComplete(current_file_downloading);
438 }
439 else {
440 DebugStream.println("Ignore for robots");
441 ignore_for_robots = false;
442 }
443 }
444 // The already there line begins "File `..." However this
445 // is only true in english, so instead I looked and there
446 // are few (if any at all) other messages than those above
447 // and not overwriting messages that use " `" so we'll
448 // look for that. Note this method is not guarenteed to be
449 // unique like the previous two.
450 else if(line.lastIndexOf(" `") != -1) {
451 // Not Overwriting
452 DebugStream.println("Already there.");
453 String new_url =
454 line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
455 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
456 downloadWarning();
457 }
458 // Any other important message starts with the time in the form hh:mm:ss
459 else if(line.length() > 7) {
460 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
461 if(!ignore_for_robots) {
462 DebugStream.println("Error.");
463 downloadFailed();
464 }
465 else {
466 ignore_for_robots = false;
467 }
468 }
469 }
470 }
471 else if (max_download == DownloadJob.DEFINED_MAX) {
472 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
473 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
474 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
475 progress.resetFileCount();
476 progress.addDownload("files"); // for display: "Downloading files"
477 }
478 else if (line.lastIndexOf("<<Done>>") != -1) {
479 progress.increaseFileCount();
480 }
481 else if(line.lastIndexOf("<<Done:") != -1) {
482 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
483 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
484 }
485
486 DebugStream.println(line);
487 download_log.appendLine(line);
488 }
489 else {
490 System.out.println("Error!!");
491 System.exit(-1);
492 }
493 }
494
495 if(state == STOPPED) {
496 boolean terminatePerlScript = true;
497
498 // When GLI is working with wget-based download modes other than OAI (MediaWiki and Web
499 // download) and the STOP button has been pressed, wget needs to be prematurely terminated.
500 // Only wget download modes Web and MediaWiki require the use of sockets to communicate
501 // with the perl script in order to get wget to terminate. Other download modes, including
502 // wgetdownload mode OAI, can terminate in the traditional manner: close process inputstream
503 // and kill perl process. OAI launches many wgets. So that when the perl process is terminated,
504 // the currently running wget will finish off but other wgets are no longer launched.
505 if(prcs != null && (mode.equals("Web") || mode.equals("MediaWiki"))) {
506
507 // create a socket to the perl child process and communicate the STOP message
508 Socket clientSocket = null;
509 if(clientSocket == null) {
510 try {
511 clientSocket = new Socket("localhost", this.port); // connect to the port chosen for this DownloadJob instance
512
513 BufferedReader clientReader = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
514 String response = clientReader.readLine(); // see if we've been connected
515 System.err.println("Communicating with perl download script on port " + this.port
516 + "\nGot response from perl: " + response);
517
518 // Send the STOP signal
519 OutputStream os = clientSocket.getOutputStream();
520 String message = "<<STOP>>\n";
521 os.write(message.getBytes());
522 response = clientReader.readLine(); // see whether the stop signal has been received
523 System.err.println("GLI sent STOP signal to perl to terminate wget."
524 + "\nGot response from perl: " + response);
525
526 response = clientReader.readLine(); // see whether the perl script is ready to be terminated
527 System.err.println("Got another response from perl: " + response);
528 os.close();
529
530 clientReader.close();
531 clientSocket.close(); // close the clientSocket (the Perl end will close the server socket that Perl opened)
532 clientReader = null;
533 clientSocket = null;
534
535 if(response == null) {
536 terminatePerlScript = false;
537 }
538 } catch(IOException ex) {
539 System.err.println("Tried to communicate through client socket - port " + this.port + ", but got exception: " + ex);
540 } catch(Exception ex) {
541 System.err.println("Tried to open client socket, but got exception: " + ex);
542 }
543 }
544 }
545
546 //prcs.getInputStream().close();
547 prcs.getErrorStream().close();
548 br.close();
549 br = null;
550 if(terminatePerlScript) {
551 prcs.destroy(); // This doesn't always work, but it's worth a try
552 prcs = null;
553 }
554
555 // Notify the DownloadScrollPane which is waiting on this job to complete that we are ready
556 synchronized(this) {
557 this.notify();
558 }
559 }
560 }
561 catch (Exception ioe) {
562 //message(Utility.ERROR, ioe.toString());
563 //JTest
564 DebugStream.printStackTrace(ioe);
565 }
566 // If we've got to here and the state isn't STOPPED then the
567 // job is complete.
568 if(state == DownloadJob.RUNNING) {
569 progress.mirrorComplete();
570 previous_state = state;
571 state = DownloadJob.COMPLETE;
572 }
573 // refresh the workspace tree
574 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
575 }
576
577 public void callDownload() {
578
579 ArrayList command_list= new ArrayList();
580
581 // the following also works for client-gli if downloading is enabled (when there's a gs2build directory inside gli)
582 command_list.add(Configuration.perl_path);
583 command_list.add("-S");
584 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
585 command_list.add("-download_mode");
586 command_list.add(mode);
587 command_list.add("-cache_dir");
588 command_list.add(Gatherer.getGLIUserCacheDirectoryPath());
589 // For the purposes of prematurely terminating wget from GLI (which creates a socket
590 // as a communication channel between GLI and Perl), it is important to tell the script
591 // that we're running as GLI. Because when running from the command prompt, it should
592 // not create this socket and do the related processing.
593 command_list.add("-gli");
594
595 ArrayList all_arg = download.getArguments(true,false);
596 for(int i = 0; i < all_arg.size(); i++) {
597 Argument argument = (Argument) all_arg.get(i);
598 if(argument.isAssigned()) {
599 command_list.add("-" + argument.getName());
600 if(argument.getType() != Argument.FLAG) {
601 command_list.add(argument.getValue());
602 }
603 }
604 }
605
606 String [] cmd = (String []) command_list.toArray(new String[0]);
607 DebugStream.println("Download job, "+command_list);
608
609 if (previous_state == DownloadJob.COMPLETE) {
610 progress.mirrorBegun(true, true);
611 }
612 else {
613 progress.mirrorBegun(false, true);
614 }
615
616 try {
617 Runtime rt = Runtime.getRuntime();
618
619 String [] env = null;
620
621 if (Utility.isWindows()) {
622 prcs = new SafeProcess(cmd);
623 }
624 else {
625 if (proxy_url != null && !proxy_url.equals("")) {
626 // Specify proxies as environment variables
627 // Need to manually specify GSDLHOME and GSDLOS also
628 env = new String[4];
629 proxy_url = proxy_url.replaceAll("http://","");
630 env[0] = "http_proxy=http://"+proxy_url;
631 env[1] = "ftp_proxy=ftp://"+proxy_url;
632 env[2] = "GSDLHOME=" + Configuration.gsdl_path;
633 env[3] = "GSDLOS=" + Gatherer.client_operating_system;
634
635 prcs = new SafeProcess(cmd, env, null);
636 }
637 else if(Gatherer.isGsdlRemote && Gatherer.isDownloadEnabled) {
638 // Not Windows, but running client with download panel
639 // Need to manually specify GSDLHOME and GSDLOS
640 env = new String[2];
641 env[0] = "GSDLHOME=" + Configuration.gsdl_path;
642 env[1] = "GSDLOS=" + Gatherer.client_operating_system;
643
644 prcs = new SafeProcess(cmd, env, null);
645 }
646 else {
647 // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set
648 prcs = new SafeProcess(cmd);
649 }
650 }
651 //System.out.println(newcmd);
652 prcs.setMainHandler(this); // attach handler to clean up before and after process.destroy()
653 // for which DownloadJob implements SafeProcess.MainProcessHandler
654
655 // To be able to stop Wget, we use sockets to communicate with the perl process that launched wget
656 if (mode.equals("Web") || mode.equals("MediaWiki")) { // wget download modes other than OAI
657
658 // Need to find an available (unused) port within the range we're looking for to pass it
659 // the Perl child process, so that it may set up a listening ServerSocket at that port number
660 try {
661 boolean foundFreePort = false;
662 for(int i = 0; i < PORT_BLOCK_SIZE; i++) {
663
664 if(isPortAvailable(nextFreePort)) {
665 foundFreePort = true;
666 break;
667
668 } else {
669 incrementNextFreePort();
670 }
671 }
672
673 if(foundFreePort) {
674 // Free port number currently found becomes the port number of the socket that this
675 // DownloadJob instance will be connecting to when the user wants to prematurely stop Wget.
676 this.port = nextFreePort;
677 incrementNextFreePort(); //// Necessary?
678
679 } else {
680 throw new Exception("Cannot find an available port in the range "
681 + PORT_BASE + "-" + (PORT_BASE+PORT_BLOCK_SIZE)
682 + "\nwhich is necessary for forcibly terminating wget.");
683 }
684
685 // Communicate the chosen port for this DownloadJob instance to the perl process, so
686 // that it can set up a ServerSocket at that port to listen for any signal to terminate wget
687 //OutputStream os = prcs.getOutputStream();
688 String p = ""+this.port+"\n";
689 System.err.println("Portnumber found: " + p);
690
691 prcs.setInputString(p);
692
693 } catch(Exception ex) {
694 System.err.println("Sent available portnumber " + this.port + " to process' outputstream.\nBut got exception: " + ex);
695 }
696 }
697
698 ProcessErrHandler errHandler = new ProcessErrHandler(); // meaningful output comes from prcs stderr
699 ProcessOutHandler outHandler = new ProcessOutHandler(); // debugging output comes from prcs' stdout
700
701 int exitVal = prcs.runProcess(null, outHandler, errHandler);
702
703 // if prcs is interrupted (cancelled) during the blocking runProcess() call,
704 // as happens on state == STOPPED, then
705 // beforeWaitingForStreamsToEnd() is called before the process' worker threads come to a halt
706 // and afterStreamsEnded() is called when the process' worker threads have halted,
707 // beforeProcessDestroy() is called before the process is destroyed,
708 // and afterProcessDestroy() is called after the proc has been destroyed.
709 // If when beforeWaitingForStreamsEnd() stage the perl was still running but had been
710 // told to stop, then the beforeWaitingForStreamsEnd() method will make sure to communicate
711 // with the perl process over a socket and send it the termination message,
712 // which will also kill any runnning wget that perl launched.
713 // In that case, destroy() is actually called on the process at last.
714
715 }
716 catch (Exception ioe) {
717 SafeProcess.log(ioe);
718 DebugStream.printStackTrace(ioe);
719 }
720
721 // now the process is done, we can at last null it
722 prcs = null;
723
724 // If we've got to here and the state isn't STOPPED then the
725 // job is complete.
726 if(getState() == DownloadJob.RUNNING) {
727 progress.mirrorComplete();
728 previous_state = getState();
729 setState(DownloadJob.COMPLETE);
730 }
731
732 SafeProcess.log("@@@@ DONE callDownload()");
733
734 /*
735 // Regardless of whether state==STOPPED or ends up being COMPLETE, the process is at an end now.
736 // Notify the DownloadScrollPane which is waiting on this job to complete that we are ready
737 synchronized(this) {
738 System.err.println("**************** Notifying download scrollpane");
739 this.notify();
740 }
741 */
742
743 // refresh the workspace tree
744 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
745 }
746
747 private synchronized boolean isStopped() { return state == STOPPED; }
748
749 // called when the user cancelled the download and we're told to stop both our external perl process
750 // and the wget process that it in turn launched
751 public void stopDownload() {
752 if(prcs != null) {
753 SafeProcess.log("@@@ Going to interrupt the SafeProcess...");
754
755 // Whether a process ends naturally or is prematurely ended, beforeWaitingForStreamsToEnd()
756 // will be called. We've hooked this in to calling tellPerlToTerminateWget() only if the
757 // process is still running when cancel is pressed, but not when it's naturally terminated.
758 boolean hadToSendInterrupt = prcs.cancelRunningProcess(); // returns false if it was already terminating/terminated, true if interrupt sent
759
760
761 /*
762 // if process terminating naturally but waiting for process' worker threads to join(),
763 // shall we just remove the progress bar display for this download?
764 // If so, do this section in place of the 2 calls to progress.enableCancelJob(boolean) below
765 if(!hadToSendInterrupt && SwingUtilities.isEventDispatchThread()) {
766 if(getState() == DownloadJob.RUNNING) {
767 progress.mirrorComplete();
768 previous_state = getState();
769 setState(DownloadJob.COMPLETE);
770 }
771 mummy.deleteCurrentDownloadJob(this); // why wait for the cleanup which can't be interrupted anyway?
772 }
773 */
774 } else {
775 System.err.println("@@@@ No process to interrupt");
776 }
777
778 //setState(STOPPED); // would set it to stop on cancel, even if it already naturally terminated
779
780 }
781
782//*********** START of implementing interface Safeprocess.MainProcessHandler
783 // before and after processDestroy only happen when interrupted AND terminatePerlScript=true
784 public void beforeProcessDestroy() {}
785 public void afterProcessDestroy() {}
786
787 // after blocking call on closing up streamgobbler worker threads that happens
788 // upon natural termination or interruption of process' main body/thread.
789 // if not overriding, then return the parameter forciblyTerminating as-is
790 public boolean afterStreamsEnded(boolean forciblyTerminating) { return forciblyTerminating; }
791
792 // called after the SafeProcess has fully terminated (naturally or via process.destroy())
793 // and has been cleaned up
794 public void doneCleanup(boolean wasForciblyTerminated) {
795 // let the user know they can cancel again now cleanup phase is done
796 progress.enableCancelJob(true);
797
798 if(wasForciblyTerminated) {
799 setState(STOPPED); // sets it to stop only if process truly was prematurely terminated, not merely
800 // if the cancel button was clicked when it had already naturally terminated
801
802 // If the user had pressed the Close button to terminate the running job, then
803 // we're now ready to remove the display of the until now running job
804 // from the download progress bar interface
805 // But don't bother removing the progress bar if the user had only pressed the Stop button
806 if(wasClosed()) {
807 mummy.deleteCurrentDownloadJob(this);
808 }
809 } /*else {
810 // If we've got to here and the state isn't STOPPED then the
811 // job is complete.
812 System.err.println("**************** NOT Notifying download scrollpane");
813 if(getState() == DownloadJob.RUNNING) {
814 progress.mirrorComplete();
815 previous_state = getState();
816 setState(DownloadJob.COMPLETE);
817 }
818 }
819
820 // Regardless of whether state==STOPPED or ends up being COMPLETE, the process is at an end now.
821 // Notify the DownloadScrollPane which is waiting on this job to complete that we are ready
822 synchronized(this) {
823 System.err.println("**************** Notifying download scrollpane");
824 this.notify();
825 }*/
826
827 }
828
829 // before blocking call of ending streamgobbler worker threads that happens
830 // after process' main body/thread has naturally terminated or been interrupted
831 public boolean beforeWaitingForStreamsToEnd(boolean forciblyTerminating) {
832 // let the user know they can't cancel during cleanup phase
833 progress.enableCancelJob(false);
834
835 SafeProcess.log("**** in beforeWaitingForStreamsToEnd()");
836
837 // state would not be STOPPED if cancel was pressed after the process naturally terminated anyway
838 // in that case we don't need to send perl the signal to terminate WGET
839 if(!forciblyTerminating) { //if(!isStopped()) {
840 SafeProcess.log("*** Process not (yet) cancelled/state not (yet) stopped");
841 SafeProcess.log("*** But process has naturally terminated (process streams are being closed before any interruption signal can be received), so won't be destroying process even on interrupt");
842 return false; // for us to be in this method at all with forciblyTerminating being false
843 // means the process is already naturally terminating, so don't unnaturally destroy it
844 }
845
846 // else the process is still running and we've been told to stop, so tell perl to stop wget first
847 // (so that process destroy can then be called thereafter)
848 return tellPerlToTerminateWget();
849 }
850//*********** END of implementing interface Safeprocess.MainProcessHandler
851
852 public boolean tellPerlToTerminateWget() {
853 SafeProcess.log("**** in tellPerlToTerminateWget()");
854
855 boolean terminatePerlScript = true;
856
857 // When GLI is working with wget-based download modes other than OAI (MediaWiki and Web
858 // download) and the STOP button has been pressed, wget needs to be prematurely terminated.
859 // Only wget download modes Web and MediaWiki require the use of sockets to communicate
860 // with the perl script in order to get wget to terminate. Other download modes, including
861 // wgetdownload mode OAI, can terminate in the traditional manner: close process inputstream
862 // and kill perl process. OAI launches many wgets. So that when the perl process is terminated,
863 // the currently running wget will finish off but other wgets are no longer launched.
864 if((mode.equals("Web") || mode.equals("MediaWiki"))) {
865 SafeProcess.log("@@@ Socket communication to end wget");
866 // create a socket to the perl child process and communicate the STOP message
867 Socket clientSocket = null;
868 BufferedReader clientReader = null;
869 OutputStream os = null;
870
871 if(clientSocket == null) {
872 try {
873 clientSocket = new Socket("localhost", this.port); // connect to the port chosen for this DownloadJob instance
874
875 clientReader = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
876 String response = clientReader.readLine(); // see if we've been connected
877 System.err.println("Communicating with perl download script on port " + this.port
878 + "\nGot response from perl: " + response);
879
880 // Send the STOP signal
881 os = clientSocket.getOutputStream();
882 String message = "<<STOP>>\n";
883 os.write(message.getBytes());
884 response = clientReader.readLine(); // see whether the stop signal has been received
885 System.err.println("GLI sent STOP signal to perl to terminate wget."
886 + "\nGot response from perl: " + response);
887
888 response = clientReader.readLine(); // see whether the perl script is ready to be terminated
889 System.err.println("Got another response from perl: " + response);
890
891 if(response == null) { // why? Is it because the process has already terminated naturally if response is null?
892 terminatePerlScript = false;
893 }
894 } catch(IOException ex) {
895 if(ex instanceof IOException && ex.getMessage().indexOf("Connection refused") != -1) {
896 terminatePerlScript = false; // no socket listening on other end because process ended
897 System.err.println("Tried to communicate through client socket - port " + this.port + ", but the process seems to have already ended naturally");
898 } else {
899 System.err.println("Tried to communicate through client socket - port " + this.port + ", but got exception: " + ex);
900 }
901
902 } catch(Exception ex) {
903 System.err.println("Tried to open client socket, but got exception: " + ex);
904 } finally {
905 SafeProcess.closeResource(os);
906 SafeProcess.closeResource(clientReader);
907 SafeProcess.closeSocket(clientSocket); // close the clientSocket (the Perl end will close the server socket that Perl opened)
908 os = null;
909 clientReader = null;
910 clientSocket = null;
911 }
912 }
913 }
914
915 return terminatePerlScript; // if true, it will call destroy() on the SafeProcess' process
916 }
917
918
919 /** Called by the WGet native code when the current download is
920 * completed. In turn all download listeners are informed.
921 */
922 public void downloadComplete() {
923 progress.downloadComplete(); // now this is synchronized
924 }
925
926
927 public void downloadComplete(String current_file_downloading)
928 {
929 progress.downloadComplete(); // now this is synchronized
930 DebugStream.println("Download complete: " + current_file_downloading);
931 }
932
933
934 /** Called by the WGet native code when the requested download returns
935 * a status code other than 200.
936 */
937 public void downloadFailed() {
938 // TODO!!
939 //synchronized(failed_urls) {
940 //failed_urls.add(current_url); // It is the current url that failed
941 //}
942 progress.downloadFailed(); // now this is synchronized
943 //DebugStream.println("Download failed: " + current_url);
944 }
945
946 /**
947 */
948 public void downloadWarning() {
949 progress.downloadWarning(); // now this is synchronized
950 }
951
952 public AppendLineOnlyFileDocument getLogDocument() {
953 return download_log;
954 }
955
956 /**
957 * @return Returns the progress bar associated with this job.
958 */
959 public DownloadProgressBar getProgressBar() {
960 return progress;
961 }
962
963 /** Called to discover if the user wanted this thread to run or if
964 * it is paused.
965 * @return An int representing the current DownloadJob state.
966 */
967 public synchronized int getState() {
968 return state;
969 }
970
971 /** @return true if the close button of the DownloadProgressBar was pressed,
972 * false otherwise such as if the Stop button had been pressed.
973 */
974 private synchronized boolean wasClosed() {
975 return this.wasClosed;
976 }
977
978 /** Returns the current state of the stop flag for this job.
979 * @return A boolean representing whether the user has requested to
980 * stop.
981 */
982 public synchronized boolean hasSignalledStop() {
983 if(state == DownloadJob.STOPPED || state == DownloadJob.PAUSED ||
984 state == DownloadJob.COMPLETE) {
985 return true;
986 }
987 return false;
988 }
989
990 public synchronized void setState(int state) {
991 previous_state = this.state;
992 this.state = state;
993 }
994
995 private synchronized void setClosed() {
996 this.wasClosed = true;
997 }
998
999 /** A convenience call.
1000 * @return A String representing the url of the initial url (root node of the mirrored tree).
1001 */
1002 public String toString() {
1003 return download_url;
1004 }
1005
1006 /** Called by the WGet native code to signal the current progress of
1007 * downloading.
1008 * @param current A long representing the number of bytes that have
1009 * been downloaded since the last update.
1010 * @param expected A long representing the total number of bytes
1011 * expected for this download.
1012 */
1013 public void updateProgress(long current, long expected) {
1014 progress.updateProgress(current, expected);
1015 }
1016
1017
1018 /*
1019 Go through https://docs.oracle.com/javase/tutorial/essential/concurrency/atomicvars.html series of
1020 Java articles on concurrency again.
1021 Go through http://docs.oracle.com/javase/tutorial/uiswing/concurrency/
1022
1023 http://stackoverflow.com/questions/574240/is-there-an-advantage-to-use-a-synchronized-method-instead-of-a-synchronized-blo
1024
1025 "Not only do synchronized methods not lock the whole class, but they don't lock the whole instance either. Unsynchronized methods in the class may still proceed on the instance."
1026 "Only the syncronized methods are locked. If there are fields you use within synced methods that are accessed by unsynced methods, you can run into race conditions."
1027
1028 "synchronizing on "this" is considered in some circles to be an anti-pattern. The unintended consequence is that outside of the class someone can lock on an object reference that is equal to "this" and prevent other threads from passing the barriers within the class potentially creating a deadlock situation. Creating a "private final Object = new Object();" variable purely for locking purposes is the often used solution. Here's another question relating directly to this issue. http://stackoverflow.com/questions/442564/avoid-synchronizedthis-in-java?lq=1"
1029
1030 "A private lock is a defensive mechanism, which is never a bad idea.
1031
1032 Also, as you alluded to, private locks can control granularity. One set of operations on an object might be totally unrelated to another but synchronized(this) will mutually exclude access to all of them."
1033
1034 http://stackoverflow.com/questions/8393883/is-synchronized-keyword-exception-safe
1035 "In any scoped thread-safe block, the moment you get out of it, the thread-safety is gone."
1036 "In case of an exception the lock will be released."
1037
1038 http://stackoverflow.com/questions/8259479/should-i-synchronize-listener-notifications-or-not
1039 "Use a CopyOnWriteArrayList for your listener arrays."
1040 "If you use the CopyOnWriteArrayList, then you don't have to synchronize when iterating."
1041 "CopyOnWriteArrayList is thread-safe, so there is no need to synchronize."
1042
1043 "Use a ConcurrentLinkedQueue<Listener> ... for this kind of problems: adding, removing and iterating simultaneously on a collection.
1044 A precision : this solution prevents a listener from being called from the very moment it is deregistered."
1045 "It means that you start iterating, an element is added, it will be called, another is removed, it won't, all this in the same iteration cycle.
1046 It's the best of both world: ensuring synchronization, while being fine grained on who gets called and who's not."
1047
1048 http://stackoverflow.com/questions/8260205/when-a-listener-is-removed-is-it-okay-that-the-event-be-called-on-that-listener
1049
1050 http://stackoverflow.com/questions/2282166/java-synchronizing-on-primitives
1051
1052 1. You can't lock on a primitive and
1053 2. Don't lock on a Long unless you're careful how you construct them. Long values created by autoboxing or Long.valueOf() in a certain range are guaranteed to be the same across the JVM which means other threads could be locking on the same exact Long object and giving you cross-talk. This can be a subtle concurrency bug (similar to locking on intern'ed strings).
1054
1055 Cross-talk:
1056 "In electronics, crosstalk is any phenomenon by which a signal transmitted on one circuit or channel of a transmission system creates an undesired effect in another circuit or channel. Crosstalk is usually caused by undesired capacitive, inductive, or conductive coupling from one circuit, part of a circuit, or channel, to another."
1057 */
1058
1059
1060 // Inner thread class that reads from process downloadfrom.pl's std output stream
1061 private class ProcessOutHandler extends SafeProcess.CustomProcessHandler {
1062
1063 public ProcessOutHandler() {
1064 super(SafeProcess.STDOUT);
1065 }
1066
1067 public void run(Closeable stream) {
1068 InputStream is = (InputStream) stream;
1069 BufferedReader eReader = null;
1070 try {
1071
1072 String message = null;
1073 eReader = new BufferedReader(new InputStreamReader(is));
1074 while(!Thread.currentThread().isInterrupted() && (message = eReader.readLine()) != null) {
1075 if(!message.equals("\n")) {
1076 System.err.println("**** Perl STDOUT: " + message);
1077 }
1078 }
1079 if(Thread.currentThread().isInterrupted()) {
1080 System.err.println("**** Perl INTERRUPTed.");
1081 } else {
1082 System.err.println("**** Perl ENDed.");
1083 }
1084
1085 } catch(Exception e) {
1086 System.err.println("Thread - caught exception: " + e);
1087 } finally {
1088 if(Thread.currentThread().isInterrupted()) {
1089 SafeProcess.log("@@@ Successfully interrupted " + Thread.currentThread().getName() + ".");
1090 }
1091 SafeProcess.closeResource(eReader);
1092 eReader = null;
1093 }
1094 }
1095 }
1096
1097
1098 private class ProcessErrHandler extends SafeProcess.CustomProcessHandler {
1099
1100 public ProcessErrHandler() {
1101 super(SafeProcess.STDERR);
1102 }
1103
1104 public void run(Closeable stream) {
1105 InputStream eis = (InputStream) stream;
1106
1107 BufferedReader br = null;
1108 try {
1109 br = new BufferedReader(new InputStreamReader(eis));
1110
1111 // Capture the standard error stream and search for two particular occurrences.
1112 String line="";
1113 boolean ignore_for_robots = false;
1114 int max_download = DownloadJob.UNKNOWN_MAX;
1115
1116 // handle to outer class objects that need synchronization (on either objects or their methods)
1117 DownloadProgressBar progress = DownloadJob.this.progress;
1118 AppendLineOnlyFileDocument download_log = DownloadJob.this.download_log;
1119
1120 while (!Thread.currentThread().isInterrupted() && (line = br.readLine()) != null
1121 && !line.trim().equals("<<Finished>>") /*&& !isStopped()*/) {
1122 if (max_download == DownloadJob.UNKNOWN_MAX) {
1123 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
1124 max_download = DownloadJob.DEFINED_MAX;
1125 }
1126 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
1127 max_download = DownloadJob.UNDEFINED_MAX;
1128 }
1129 }
1130 else if(max_download == DownloadJob.UNDEFINED_MAX) {
1131 DebugStream.println(line);
1132 download_log.appendLine(line); // now synchronized
1133 // The first magic special test is to see if we've just
1134 // asked for the robots.txt file. If so we ignore
1135 // the next add and then the next complete/error.
1136 if(line.lastIndexOf("robots.txt;") != -1) {
1137 DebugStream.println("***** Requesting robot.txt");
1138 ignore_for_robots = true;
1139 }
1140 // If line contains "=> `" display text as the
1141 // currently downloading url. Unique to add download.
1142 else if(line.lastIndexOf("=> `") != -1) {
1143 if(!ignore_for_robots) {
1144 // Add download
1145 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
1146
1147 // now synchronized
1148 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
1149 }
1150 }
1151 // If line contains "/s) - `" set currently
1152 // downloading url to "Download Complete".
1153 else if(line.lastIndexOf("/s) - `") != -1) {
1154 String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
1155 if(!ignore_for_robots) {
1156 DebugStream.println("Not ignore for robots");
1157 // Download complete
1158 downloadComplete(current_file_downloading); // synchronized
1159 }
1160 else {
1161 DebugStream.println("Ignore for robots");
1162 ignore_for_robots = false;
1163 }
1164 }
1165 // The already there line begins "File `..." However this
1166 // is only true in english, so instead I looked and there
1167 // are few (if any at all) other messages than those above
1168 // and not overwriting messages that use " `" so we'll
1169 // look for that. Note this method is not guarenteed to be
1170 // unique like the previous two.
1171 else if(line.lastIndexOf(" `") != -1) {
1172 // Not Overwriting
1173 DebugStream.println("Already there.");
1174 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
1175
1176 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
1177 downloadWarning();
1178 }
1179 // Any other important message starts with the time in the form hh:mm:ss
1180 else if(line.length() > 7) {
1181 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
1182 if(!ignore_for_robots) {
1183 DebugStream.println("Error.");
1184 downloadFailed();
1185 }
1186 else {
1187 ignore_for_robots = false;
1188 }
1189 }
1190 }
1191 }
1192 else if (max_download == DownloadJob.DEFINED_MAX) {
1193 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
1194 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
1195
1196 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
1197 progress.resetFileCount();
1198 progress.addDownload("files"); // for display: "Downloading files"
1199
1200 }
1201 else if (line.lastIndexOf("<<Done>>") != -1) {
1202 progress.increaseFileCount();
1203 }
1204 else if(line.lastIndexOf("<<Done:") != -1) {
1205 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
1206 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
1207 }
1208
1209 DebugStream.println(line);
1210 download_log.appendLine(line);
1211 }
1212 else {
1213 System.out.println("Error!!");
1214 System.exit(-1);
1215 }
1216 }
1217
1218 } catch (IOException ioe) {
1219 //message(Utility.ERROR, ioe.toString());
1220 //JTest
1221 DebugStream.printStackTrace(ioe);
1222
1223 } finally {
1224 if(Thread.currentThread().isInterrupted()) { // if the thread this class is running in is interrupted
1225 SafeProcess.log("@@@ Successfully interrupted " + Thread.currentThread().getName() + ".");
1226 }
1227
1228 SafeProcess.closeResource(br);
1229 br = null;
1230 }
1231
1232 }
1233 }
1234}
Note: See TracBrowser for help on using the repository browser.