source: main/trunk/gli/src/org/greenstone/gatherer/download/DownloadJob.java@ 31721

Last change on this file since 31721 was 31721, checked in by ak19, 7 years ago

Removed unnecessary comments. Emacs tabbing.

  • Property svn:keywords set to Author Date Id Revision
File size: 46.7 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.download;
38
39import java.awt.event.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.tree.*;
44import javax.swing.SwingUtilities;
45import org.greenstone.gatherer.Configuration;
46import org.greenstone.gatherer.DebugStream;
47import org.greenstone.gatherer.Dictionary;
48import org.greenstone.gatherer.Gatherer;
49import org.greenstone.gatherer.GAuthenticator;
50import org.greenstone.gatherer.greenstone.LocalGreenstone;
51import org.greenstone.gatherer.file.WorkspaceTree;
52import org.greenstone.gatherer.util.AppendLineOnlyFileDocument;
53import org.greenstone.gatherer.util.SafeProcess;
54import org.greenstone.gatherer.util.Utility;
55import org.greenstone.gatherer.cdm.Argument;
56import org.greenstone.gatherer.collection.*;
57
58/**
59 * @author John Thompson, Greenstone Digital Library, University of Waikato
60 * @version 2.0
61 * When modifying this class, bear in mind concurrency issues that could arise with
62 * SafeProcess's worker threads and where synchronization may be needed to prevent such issues.
63 */
64public class DownloadJob
65 implements ActionListener, SafeProcess.MainProcessHandler {
66
67 private AppendLineOnlyFileDocument download_log;
68
69 private DownloadProgressBar progress;
70
71 private int previous_state;
72 private int state;
73
74 private SafeProcess prcs = null;
75
76 private final String download_url;
77 private boolean wasClosed = false;
78
79 // private String current_url;
80 // private String destination;
81 private final String proxy_pass;
82 private final String proxy_user;
83
84 //private final Vector encountered_urls;
85 //private Vector failed_urls;
86 private Download download;
87 private DownloadScrollPane mummy;
88 private HashMap download_option;
89
90 public static final int COMPLETE = 0;
91 public static final int PAUSED = 1;
92 public static final int RUNNING = 2;
93 public static final int STOPPED = 3;
94
95 public static final int UNKNOWN_MAX = 0;
96 public static final int DEFINED_MAX = 1;
97 public static final int UNDEFINED_MAX = 2;
98
99 // To prematurely terminate wget, we will need to use sockets and find a free port.
100 // We will look at a limited range of ports. This range will be reused (circular buffer)
101 private static final int PORT_BASE = 50000;
102 private static final int PORT_BLOCK_SIZE = 100;
103 private static int nextFreePort = PORT_BASE; // Keep track what port numbers we have checked for availability
104 int port; // package access. The socket port number this instance of DownloadJob will use
105 // only the main thread (where DownloadJob runs) modifies port, so no synching needed
106
107 private final String mode;
108
109 private String proxy_url; // only the main thread (where DownloadJob runs) modifies this, so no synching needed
110
111 /**
112 */
113 public DownloadJob(Download download, String proxy_pass, String proxy_user, DownloadScrollPane mummy, String mode, String proxy_url) {
114 URL url = null;
115 int folder_hash;
116
117 this.proxy_url = proxy_url;
118
119 download_option = downloadToHashMap(download);
120 if (!mode.equals("Z3950") && !mode.equals("SRW")) {
121 Argument url_arg = (Argument)download_option.get((String)"url");
122 download_url = url_arg.getValue();
123
124 }
125 else {
126 Argument host_arg = (Argument)download_option.get((String)"host");
127 Argument port_arg = (Argument)download_option.get((String)"port");
128 download_url = host_arg.getValue() + ":" +port_arg.getValue();
129 }
130
131 folder_hash = download_url.hashCode();
132 String log_filename = Utility.getLogDir(null) + "download-"+ mode + folder_hash + ".log";
133 File log_file = new File(log_filename);
134 if(log_file.exists()) {
135 log_file.delete();
136 }
137
138 File parent_log_file = log_file.getParentFile();
139 parent_log_file.mkdirs();
140 parent_log_file = null;
141 log_file = null;
142
143 this.download_log = new AppendLineOnlyFileDocument(log_filename, false);
144
145 this.proxy_pass = proxy_pass;
146 this.proxy_user = proxy_user;
147 this.mummy = mummy;
148 this.mode = mode;
149 this.download = download;
150
151 progress = new DownloadProgressBar(this,download_url, true);
152 //encountered_urls = new Vector();
153 //failed_urls = new Vector();
154
155 previous_state = STOPPED;
156 state = STOPPED;
157 }
158
159 private HashMap downloadToHashMap(Download download)
160 {
161 HashMap download_option = new HashMap();
162 ArrayList arguments = download.getArguments(true, false);
163 for(int i = 0; i < arguments.size(); i++) {
164 Argument argument = (Argument) arguments.get(i);
165 download_option.put(argument.getName(), argument);
166 }
167 return download_option;
168 }
169
170 /** Depending on which button on the progress bar was pushed,
171 * this method will affect the state of the DownloadJob and perhaps make
172 * calls to wget.class if necessary.
173 * @param event The ActionEvent fired from within the DownloadProgressBar
174 * which we must respond to.
175 */
176 public void old_actionPerformed(ActionEvent event) {
177 // The stop_start_button is used to alternately start or stop the
178 // job. If the current state of the job is paused then this
179 // restart is logically equivalent to a resume.
180 if(event.getSource() == progress.stop_start_button) {
181 previous_state = state;
182 if (state == RUNNING) {
183 state = STOPPED;
184 } else {
185 //previous_state = state;
186 state = RUNNING;
187 mummy.resumeThread();
188 }
189 }
190 else if (event.getSource() == progress.close_button) {
191 if(state == RUNNING) {
192 previous_state = state;
193 state = STOPPED; // do we need to do anything else to stop this?
194 }
195 mummy.deleteDownloadJob(this);
196 }
197 }
198
199 /** Depending on which button on the progress bar was pushed,
200 * this method will affect the state of the DownloadJob and perhaps make
201 * calls to wget.class if necessary.
202 * @param event The ActionEvent fired from within the DownloadProgressBar
203 * which we must respond to.
204 * Now using synchronized methods like previous_state = getState(); instead of
205 * previous_state = state; and setState(STOPPED); instead of state = STOPPED;
206 */
207 public void actionPerformed(ActionEvent event) {
208 // The stop_start_button is used to alternately start or stop the
209 // job. If the current state of the job is paused then this
210 // restart is logically equivalent to a resume.
211 if(event.getSource() == progress.stop_start_button) {
212 previous_state = getState();
213 if (getState() == RUNNING) {
214 stopDownload(); // cancels any running SafeProcess, will set the current state to STOPPED when the time is right
215 } else {
216 setState(RUNNING);
217 mummy.resumeThread();
218 }
219 }
220 else if (event.getSource() == progress.close_button) {
221 setClosed();
222 SafeProcess.log("@@@ Progress bar close button pressed");
223 if(getState() == RUNNING) {
224 previous_state = getState();
225 stopDownload(); // cancels any running SafeProcess, will set the current state to STOPPED when the time is right
226 }
227 mummy.deleteDownloadJob(this);
228 }
229 }
230
231 /** Given a portnumber to check, returns true if it is available
232 * (if nothing's listening there already). */
233 public static boolean isPortAvailable(int portnum) {
234 Socket tmpSocket = null;
235 try {
236 tmpSocket = new Socket("localhost", portnum);
237 tmpSocket.close();
238 return false;
239
240 } catch(ConnectException ex){
241 // "Signals that an error occurred while attempting to connect a socket
242 // to a remote address and port. Typically, the connection was refused
243 // remotely (e.g., no process is listening on the remote address/port)."
244 System.err.println("Port " + portnum + " not yet in use.");
245 tmpSocket = null;
246 return true;
247
248 } catch(Exception ex) {
249 // includes BindException "Signals that an error occurred while attempting
250 // to bind a socket to a local address and port. Typically, the port is in
251 // use, or the requested local address could not be assigned."
252 tmpSocket = null;
253 return false;
254 }
255 }
256
257 /** Circular buffer. Modifies the value of nextFreePort (the buffer index). */
258 private void incrementNextFreePort() {
259 int offset = nextFreePort - PORT_BASE;
260 offset = (offset + 1) % PORT_BLOCK_SIZE;
261 nextFreePort = PORT_BASE + offset;
262 }
263
264 // If eschewing the use of SafeProcess, reactivate (by renaming) old_callDownload()
265 // and old_actionPerformed(), and DownloadScrollPane.java's old_deleteDownloadJob().
266 public void old_callDownload() {
267
268 ArrayList command_list = new ArrayList();
269
270 // the following also works for client-gli if downloading is enabled (when there's a gs2build directory inside gli)
271 command_list.add(Configuration.perl_path);
272 command_list.add("-S");
273 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
274 command_list.add("-download_mode");
275 command_list.add(mode);
276 command_list.add("-cache_dir");
277 command_list.add(Gatherer.getGLIUserCacheDirectoryPath());
278 // For the purposes of prematurely terminating wget from GLI (which creates a socket
279 // as a communication channel between GLI and Perl), it is important to tell the script
280 // that we're running as GLI. Because when running from the command prompt, it should
281 // not create this socket and do the related processing.
282 command_list.add("-gli");
283
284 ArrayList all_arg = download.getArguments(true,false);
285 for(int i = 0; i < all_arg.size(); i++) {
286 Argument argument = (Argument) all_arg.get(i);
287 if(argument.isAssigned()) {
288 command_list.add("-" + argument.getName());
289 if(argument.getType() != Argument.FLAG) {
290 command_list.add(argument.getValue());
291 }
292 }
293 }
294
295 String [] cmd = (String []) command_list.toArray(new String[0]);
296 DebugStream.println("Download job, "+command_list);
297
298 if (previous_state == DownloadJob.COMPLETE) {
299 progress.mirrorBegun(true, true);
300 }
301 else {
302 progress.mirrorBegun(false, true);
303 }
304
305 try {
306 Runtime rt = Runtime.getRuntime();
307
308 String [] env = null;
309
310 Process prcs = null;
311
312
313 if (Utility.isWindows()) {
314 prcs = rt.exec(cmd);
315 }
316 else {
317 if (proxy_url != null && !proxy_url.equals("")) {
318 // Specify proxies as environment variables
319 // Need to manually specify GSDLHOME and GSDLOS also
320 env = new String[4];
321 proxy_url = proxy_url.replaceAll("http://","");
322 env[0] = "http_proxy=http://"+proxy_url;
323 env[1] = "ftp_proxy=ftp://"+proxy_url;
324 env[2] = "GSDLHOME=" + Configuration.gsdl_path;
325 env[3] = "GSDLOS=" + Gatherer.client_operating_system;
326 prcs = rt.exec(cmd, env);
327 }
328 else if(Gatherer.isGsdlRemote && Gatherer.isDownloadEnabled) {
329 // Not Windows, but running client with download panel
330 // Need to manually specify GSDLHOME and GSDLOS
331 env = new String[2];
332 env[0] = "GSDLHOME=" + Configuration.gsdl_path;
333 env[1] = "GSDLOS=" + Gatherer.client_operating_system;
334 prcs = rt.exec(cmd, env);
335 }
336 else {
337 // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set
338 prcs = rt.exec(cmd);
339 }
340 }
341 //System.out.println(newcmd);
342
343 // Can use the following if debugging WgetDownload.pm - Reads debug stmts from the perl process' STDIN stream
344 //(new PerlReaderThread(prcs)).start();
345
346 InputStream is = prcs.getInputStream();
347 BufferedReader reader = new BufferedReader(new InputStreamReader(is));
348
349 // To be able to stop Wget, we use sockets to communicate with the perl process that launched wget
350 if (mode.equals("Web") || mode.equals("MediaWiki")) { // wget download modes other than OAI
351
352 // Need to find an available (unused) port within the range we're looking for to pass it
353 // the Perl child process, so that it may set up a listening ServerSocket at that port number
354 try {
355 boolean foundFreePort = false;
356 for(int i = 0; i < PORT_BLOCK_SIZE; i++) {
357
358 if(isPortAvailable(nextFreePort)) {
359 foundFreePort = true;
360 break;
361
362 } else {
363 incrementNextFreePort();
364 }
365 }
366
367 if(foundFreePort) {
368 // Free port number currently found becomes the port number of the socket that this
369 // DownloadJob instance will be connecting to when the user wants to prematurely stop Wget.
370 this.port = nextFreePort;
371 incrementNextFreePort();
372
373 } else {
374 throw new Exception("Cannot find an available port in the range "
375 + PORT_BASE + "-" + (PORT_BASE+PORT_BLOCK_SIZE)
376 + "\nwhich is necessary for forcibly terminating wget.");
377 }
378
379 // Communicate the chosen port for this DownloadJob instance to the perl process, so
380 // that it can set up a ServerSocket at that port to listen for any signal to terminate wget
381 OutputStream os = prcs.getOutputStream();
382 String p = ""+this.port+"\n";
383 System.err.println("Portnumber found: " + p);
384
385 os.write(p.getBytes());
386 os.close();
387
388 } catch(Exception ex) {
389 System.err.println("Sent available portnumber " + this.port + " to process' outputstream.\nBut got exception: " + ex);
390 }
391 }
392
393 BufferedReader br = new BufferedReader(new InputStreamReader(prcs.getErrorStream()));
394 // Capture the standard error stream and search for two particular occurrences.
395 String line="";
396 boolean ignore_for_robots = false;
397 int max_download = DownloadJob.UNKNOWN_MAX;
398
399 while ((line = br.readLine()) != null && !line.trim().equals("<<Finished>>") && state != STOPPED) {
400 if ( max_download == DownloadJob.UNKNOWN_MAX) {
401 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
402 max_download = DownloadJob.DEFINED_MAX;
403 }
404 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
405 max_download = DownloadJob.UNDEFINED_MAX;
406 }
407 }
408 else if(max_download == DownloadJob.UNDEFINED_MAX) {
409 DebugStream.println(line);
410 download_log.appendLine(line);
411 // The first magic special test is to see if we've just
412 // asked for the robots.txt file. If so we ignore
413 // the next add and then the next complete/error.
414 if(line.lastIndexOf("robots.txt;") != -1) {
415 DebugStream.println("***** Requesting robot.txt");
416 ignore_for_robots = true;
417 }
418 // If line contains "=> `" display text as the
419 // currently downloading url. Unique to add download.
420 else if(line.lastIndexOf("=> `") != -1) {
421 if(!ignore_for_robots) {
422 // Add download
423 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
424 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
425 }
426 }
427 // If line contains "/s) - `" set currently
428 // downloading url to "Download Complete".
429 else if(line.lastIndexOf("/s) - `") != -1) {
430 String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
431 if(!ignore_for_robots) {
432 DebugStream.println("Not ignore for robots");
433 // Download complete
434 downloadComplete(current_file_downloading);
435 }
436 else {
437 DebugStream.println("Ignore for robots");
438 ignore_for_robots = false;
439 }
440 }
441 // The already there line begins "File `..." However this
442 // is only true in english, so instead I looked and there
443 // are few (if any at all) other messages than those above
444 // and not overwriting messages that use " `" so we'll
445 // look for that. Note this method is not guarenteed to be
446 // unique like the previous two.
447 else if(line.lastIndexOf(" `") != -1) {
448 // Not Overwriting
449 DebugStream.println("Already there.");
450 String new_url =
451 line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
452 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
453 downloadWarning();
454 }
455 // Any other important message starts with the time in the form hh:mm:ss
456 else if(line.length() > 7) {
457 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
458 if(!ignore_for_robots) {
459 DebugStream.println("Error.");
460 downloadFailed();
461 }
462 else {
463 ignore_for_robots = false;
464 }
465 }
466 }
467 }
468 else if (max_download == DownloadJob.DEFINED_MAX) {
469 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
470 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
471 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
472 progress.resetFileCount();
473 progress.addDownload("files"); // for display: "Downloading files"
474 }
475 else if (line.lastIndexOf("<<Done>>") != -1) {
476 progress.increaseFileCount();
477 }
478 else if(line.lastIndexOf("<<Done:") != -1) {
479 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
480 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
481 }
482
483 DebugStream.println(line);
484 download_log.appendLine(line);
485 }
486 else {
487 System.out.println("Error!!");
488 System.exit(-1);
489 }
490 }
491
492 if(state == STOPPED) {
493 boolean terminatePerlScript = true;
494
495 // When GLI is working with wget-based download modes other than OAI (MediaWiki and Web
496 // download) and the STOP button has been pressed, wget needs to be prematurely terminated.
497 // Only wget download modes Web and MediaWiki require the use of sockets to communicate
498 // with the perl script in order to get wget to terminate. Other download modes, including
499 // wgetdownload mode OAI, can terminate in the traditional manner: close process inputstream
500 // and kill perl process. OAI launches many wgets. So that when the perl process is terminated,
501 // the currently running wget will finish off but other wgets are no longer launched.
502 if(prcs != null && (mode.equals("Web") || mode.equals("MediaWiki"))) {
503
504 // create a socket to the perl child process and communicate the STOP message
505 Socket clientSocket = null;
506 if(clientSocket == null) {
507 try {
508 clientSocket = new Socket("localhost", this.port); // connect to the port chosen for this DownloadJob instance
509
510 BufferedReader clientReader = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
511 String response = clientReader.readLine(); // see if we've been connected
512 System.err.println("Communicating with perl download script on port " + this.port
513 + "\nGot response from perl: " + response);
514
515 // Send the STOP signal
516 OutputStream os = clientSocket.getOutputStream();
517 String message = "<<STOP>>\n";
518 os.write(message.getBytes());
519 response = clientReader.readLine(); // see whether the stop signal has been received
520 System.err.println("GLI sent STOP signal to perl to terminate wget."
521 + "\nGot response from perl: " + response);
522
523 response = clientReader.readLine(); // see whether the perl script is ready to be terminated
524 System.err.println("Got another response from perl: " + response);
525 os.close();
526
527 clientReader.close();
528 clientSocket.close(); // close the clientSocket (the Perl end will close the server socket that Perl opened)
529 clientReader = null;
530 clientSocket = null;
531
532 if(response == null) {
533 terminatePerlScript = false;
534 }
535 } catch(IOException ex) {
536 System.err.println("Tried to communicate through client socket - port " + this.port + ", but got exception: " + ex);
537 } catch(Exception ex) {
538 System.err.println("Tried to open client socket, but got exception: " + ex);
539 }
540 }
541 }
542
543 //prcs.getInputStream().close();
544 prcs.getErrorStream().close();
545 br.close();
546 br = null;
547 if(terminatePerlScript) {
548 prcs.destroy(); // This doesn't always work, but it's worth a try
549 prcs = null;
550 }
551
552 // Notify the DownloadScrollPane which is waiting on this job to complete that we are ready
553 synchronized(this) {
554 this.notify();
555 }
556 }
557 }
558 catch (Exception ioe) {
559 //message(Utility.ERROR, ioe.toString());
560 //JTest
561 DebugStream.printStackTrace(ioe);
562 }
563 // If we've got to here and the state isn't STOPPED then the
564 // job is complete.
565 if(state == DownloadJob.RUNNING) {
566 progress.mirrorComplete();
567 previous_state = state;
568 state = DownloadJob.COMPLETE;
569 }
570 // refresh the workspace tree
571 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
572 }
573
574 public void callDownload() {
575
576 ArrayList command_list= new ArrayList();
577
578 // the following also works for client-gli if downloading is enabled (when there's a gs2build directory inside gli)
579 command_list.add(Configuration.perl_path);
580 command_list.add("-S");
581 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
582 command_list.add("-download_mode");
583 command_list.add(mode);
584 command_list.add("-cache_dir");
585 command_list.add(Gatherer.getGLIUserCacheDirectoryPath());
586 // For the purposes of prematurely terminating wget from GLI (which creates a socket
587 // as a communication channel between GLI and Perl), it is important to tell the script
588 // that we're running as GLI. Because when running from the command prompt, it should
589 // not create this socket and do the related processing.
590 command_list.add("-gli");
591
592 ArrayList all_arg = download.getArguments(true,false);
593 for(int i = 0; i < all_arg.size(); i++) {
594 Argument argument = (Argument) all_arg.get(i);
595 if(argument.isAssigned()) {
596 command_list.add("-" + argument.getName());
597 if(argument.getType() != Argument.FLAG) {
598 command_list.add(argument.getValue());
599 }
600 }
601 }
602
603 String [] cmd = (String []) command_list.toArray(new String[0]);
604 DebugStream.println("Download job, "+command_list);
605
606 if (previous_state == DownloadJob.COMPLETE) {
607 progress.mirrorBegun(true, true);
608 }
609 else {
610 progress.mirrorBegun(false, true);
611 }
612
613 try {
614 Runtime rt = Runtime.getRuntime();
615
616 String [] env = null;
617
618 if (Utility.isWindows()) {
619 prcs = new SafeProcess(cmd);
620 }
621 else {
622 if (proxy_url != null && !proxy_url.equals("")) {
623 // Specify proxies as environment variables
624 // Need to manually specify GSDLHOME and GSDLOS also
625 env = new String[4];
626 proxy_url = proxy_url.replaceAll("http://","");
627 env[0] = "http_proxy=http://"+proxy_url;
628 env[1] = "ftp_proxy=ftp://"+proxy_url;
629 env[2] = "GSDLHOME=" + Configuration.gsdl_path;
630 env[3] = "GSDLOS=" + Gatherer.client_operating_system;
631
632 prcs = new SafeProcess(cmd, env, null);
633 }
634 else if(Gatherer.isGsdlRemote && Gatherer.isDownloadEnabled) {
635 // Not Windows, but running client with download panel
636 // Need to manually specify GSDLHOME and GSDLOS
637 env = new String[2];
638 env[0] = "GSDLHOME=" + Configuration.gsdl_path;
639 env[1] = "GSDLOS=" + Gatherer.client_operating_system;
640
641 prcs = new SafeProcess(cmd, env, null);
642 }
643 else {
644 // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set
645 prcs = new SafeProcess(cmd);
646 }
647 }
648 //System.out.println(newcmd);
649 prcs.setMainHandler(this); // attach handler to clean up before and after process.destroy()
650 // for which DownloadJob implements SafeProcess.MainProcessHandler
651
652 // To be able to stop Wget, we use sockets to communicate with the perl process that launched wget
653 if (mode.equals("Web") || mode.equals("MediaWiki")) { // wget download modes other than OAI
654
655 // Need to find an available (unused) port within the range we're looking for to pass it
656 // the Perl child process, so that it may set up a listening ServerSocket at that port number
657 try {
658 boolean foundFreePort = false;
659 for(int i = 0; i < PORT_BLOCK_SIZE; i++) {
660
661 if(isPortAvailable(nextFreePort)) {
662 foundFreePort = true;
663 break;
664
665 } else {
666 incrementNextFreePort();
667 }
668 }
669
670 if(foundFreePort) {
671 // Free port number currently found becomes the port number of the socket that this
672 // DownloadJob instance will be connecting to when the user wants to prematurely stop Wget.
673 this.port = nextFreePort;
674 incrementNextFreePort(); //// Necessary?
675
676 } else {
677 throw new Exception("Cannot find an available port in the range "
678 + PORT_BASE + "-" + (PORT_BASE+PORT_BLOCK_SIZE)
679 + "\nwhich is necessary for forcibly terminating wget.");
680 }
681
682 // Communicate the chosen port for this DownloadJob instance to the perl process, so
683 // that it can set up a ServerSocket at that port to listen for any signal to terminate wget
684 //OutputStream os = prcs.getOutputStream();
685 String p = ""+this.port+"\n";
686 System.err.println("Portnumber found: " + p);
687
688 prcs.setInputString(p);
689
690 } catch(Exception ex) {
691 System.err.println("Sent available portnumber " + this.port + " to process' outputstream.\nBut got exception: " + ex);
692 }
693 }
694
695 ProcessErrHandler errHandler = new ProcessErrHandler(); // meaningful output comes from prcs stderr
696 ProcessOutHandler outHandler = new ProcessOutHandler(); // debugging output comes from prcs' stdout
697
698 int exitVal = prcs.runProcess(null, outHandler, errHandler);
699
700 // if prcs is interrupted (cancelled) during the blocking runProcess() call,
701 // as happens on state == STOPPED, then
702 // beforeWaitingForStreamsToEnd() is called before the process' worker threads come to a halt
703 // and afterStreamsEnded() is called when the process' worker threads have halted,
704 // beforeProcessDestroy() is called before the process is destroyed,
705 // and afterProcessDestroy() is called after the proc has been destroyed.
706 // If when beforeWaitingForStreamsEnd() stage the perl was still running but had been
707 // told to stop, then the beforeWaitingForStreamsEnd() method will make sure to communicate
708 // with the perl process over a socket and send it the termination message,
709 // which will also kill any runnning wget that perl launched.
710 // In that case, destroy() is actually called on the process at last.
711
712 }
713 catch (Exception ioe) {
714 SafeProcess.log(ioe);
715 DebugStream.printStackTrace(ioe);
716 }
717
718 // now the process is done, we can at last null it
719 prcs = null;
720
721 // If we've got to here and the state isn't STOPPED then the
722 // job is complete.
723 if(getState() == DownloadJob.RUNNING) {
724 progress.mirrorComplete();
725 previous_state = getState();
726 setState(DownloadJob.COMPLETE);
727 }
728
729 SafeProcess.log("@@@@ DONE callDownload()");
730
731 // refresh the workspace tree
732 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
733 }
734
735 private synchronized boolean isStopped() { return state == STOPPED; }
736
737 // called when the user cancelled the download and we're told to stop both our external perl process
738 // and the wget process that it in turn launched
739 public void stopDownload() {
740 if(prcs != null) {
741 SafeProcess.log("@@@ Going to interrupt the SafeProcess...");
742
743 // Whether a process ends naturally or is prematurely ended, beforeWaitingForStreamsToEnd()
744 // will be called. We've hooked this in to calling tellPerlToTerminateWget() only if the
745 // process is still running when cancel is pressed, but not when it's naturally terminated.
746 boolean hadToSendInterrupt = prcs.cancelRunningProcess(); // returns false if it was already terminating/terminated, true if interrupt sent
747
748 } else {
749 System.err.println("@@@@ No process to interrupt");
750 }
751
752 //setState(STOPPED); // would set it to stop on cancel, even if it already naturally terminated
753
754 }
755
756//*********** START of implementing interface Safeprocess.MainProcessHandler
757 // before and after processDestroy only happen when interrupted AND terminatePerlScript=true
758 public void beforeProcessDestroy() {}
759 public void afterProcessDestroy() {}
760
761 // after blocking call on closing up streamgobbler worker threads that happens
762 // upon natural termination or interruption of process' main body/thread.
763 // if not overriding, then return the parameter forciblyTerminating as-is
764 public boolean afterStreamsEnded(boolean forciblyTerminating) { return forciblyTerminating; }
765
766 // called after the SafeProcess has fully terminated (naturally or via process.destroy())
767 // and has been cleaned up
768 public void doneCleanup(boolean wasForciblyTerminated) {
769 // let the user know they can cancel again now cleanup phase is done
770 progress.enableCancelJob(true);
771
772 if(wasForciblyTerminated) {
773 setState(STOPPED); // sets it to stop only if process truly was prematurely terminated, not merely
774 // if the cancel button was clicked when it had already naturally terminated
775
776 // If the user had pressed the Close button to terminate the running job, then
777 // we're now ready to remove the display of the until now running job
778 // from the download progress bar interface
779 // But don't bother removing the progress bar if the user had only pressed the Stop button
780 if(wasClosed()) {
781 mummy.deleteCurrentDownloadJob(this);
782 }
783 }
784 }
785
786 // before blocking call of ending streamgobbler worker threads that happens
787 // after process' main body/thread has naturally terminated or been interrupted
788 public boolean beforeWaitingForStreamsToEnd(boolean forciblyTerminating) {
789 // let the user know they can't cancel during cleanup phase
790 progress.enableCancelJob(false);
791
792 SafeProcess.log("**** in beforeWaitingForStreamsToEnd()");
793
794 // state would not be STOPPED if cancel was pressed after the process naturally terminated anyway
795 // in that case we don't need to send perl the signal to terminate WGET
796 if(!forciblyTerminating) { //if(!isStopped()) {
797 SafeProcess.log("*** Process not (yet) cancelled/state not (yet) stopped");
798 SafeProcess.log("*** But process has naturally terminated (process streams are being closed before any interruption signal can be received), so won't be destroying process even on interrupt");
799 return false; // for us to be in this method at all with forciblyTerminating being false
800 // means the process is already naturally terminating, so don't unnaturally destroy it
801 }
802
803 // else the process is still running and we've been told to stop, so tell perl to stop wget first
804 // (so that process destroy can then be called thereafter)
805 return tellPerlToTerminateWget();
806 }
807//*********** END of implementing interface Safeprocess.MainProcessHandler
808
809 public boolean tellPerlToTerminateWget() {
810 SafeProcess.log("**** in tellPerlToTerminateWget()");
811
812 boolean terminatePerlScript = true;
813
814 // When GLI is working with wget-based download modes other than OAI (MediaWiki and Web
815 // download) and the STOP button has been pressed, wget needs to be prematurely terminated.
816 // Only wget download modes Web and MediaWiki require the use of sockets to communicate
817 // with the perl script in order to get wget to terminate. Other download modes, including
818 // wgetdownload mode OAI, can terminate in the traditional manner: close process inputstream
819 // and kill perl process. OAI launches many wgets. So that when the perl process is terminated,
820 // the currently running wget will finish off but other wgets are no longer launched.
821 if((mode.equals("Web") || mode.equals("MediaWiki"))) {
822 SafeProcess.log("@@@ Socket communication to end wget");
823 // create a socket to the perl child process and communicate the STOP message
824 Socket clientSocket = null;
825 BufferedReader clientReader = null;
826 OutputStream os = null;
827
828 if(clientSocket == null) {
829 try {
830 clientSocket = new Socket("localhost", this.port); // connect to the port chosen for this DownloadJob instance
831
832 clientReader = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
833 String response = clientReader.readLine(); // see if we've been connected
834 System.err.println("Communicating with perl download script on port " + this.port
835 + "\nGot response from perl: " + response);
836
837 // Send the STOP signal
838 os = clientSocket.getOutputStream();
839 String message = "<<STOP>>\n";
840 os.write(message.getBytes());
841 response = clientReader.readLine(); // see whether the stop signal has been received
842 System.err.println("GLI sent STOP signal to perl to terminate wget."
843 + "\nGot response from perl: " + response);
844
845 response = clientReader.readLine(); // see whether the perl script is ready to be terminated
846 System.err.println("Got another response from perl: " + response);
847
848 if(response == null) { // why? Is it because the process has already terminated naturally if response is null?
849 terminatePerlScript = false;
850 }
851 } catch(IOException ex) {
852 if(ex instanceof IOException && ex.getMessage().indexOf("Connection refused") != -1) {
853 terminatePerlScript = false; // no socket listening on other end because process ended
854 System.err.println("Tried to communicate through client socket - port " + this.port + ", but the process seems to have already ended naturally");
855 } else {
856 System.err.println("Tried to communicate through client socket - port " + this.port + ", but got exception: " + ex);
857 }
858
859 } catch(Exception ex) {
860 System.err.println("Tried to open client socket, but got exception: " + ex);
861 } finally {
862 SafeProcess.closeResource(os);
863 SafeProcess.closeResource(clientReader);
864 SafeProcess.closeSocket(clientSocket); // close the clientSocket (the Perl end will close the server socket that Perl opened)
865 os = null;
866 clientReader = null;
867 clientSocket = null;
868 }
869 }
870 }
871
872 return terminatePerlScript; // if true, it will call destroy() on the SafeProcess' process
873 }
874
875
876 /** Called by the WGet native code when the current download is
877 * completed. In turn all download listeners are informed.
878 */
879 public void downloadComplete() {
880 progress.downloadComplete(); // now this is synchronized
881 }
882
883
884 public void downloadComplete(String current_file_downloading)
885 {
886 progress.downloadComplete(); // now this is synchronized
887 DebugStream.println("Download complete: " + current_file_downloading);
888 }
889
890
891 /** Called by the WGet native code when the requested download returns
892 * a status code other than 200.
893 */
894 public void downloadFailed() {
895 // TODO!!
896 //synchronized(failed_urls) {
897 //failed_urls.add(current_url); // It is the current url that failed
898 //}
899 progress.downloadFailed(); // now this is synchronized
900 //DebugStream.println("Download failed: " + current_url);
901 }
902
903 /**
904 */
905 public void downloadWarning() {
906 progress.downloadWarning(); // now this is synchronized
907 }
908
909 public AppendLineOnlyFileDocument getLogDocument() {
910 return download_log;
911 }
912
913 /**
914 * @return Returns the progress bar associated with this job.
915 */
916 public DownloadProgressBar getProgressBar() {
917 return progress;
918 }
919
920 /** Called to discover if the user wanted this thread to run or if
921 * it is paused.
922 * @return An int representing the current DownloadJob state.
923 */
924 public synchronized int getState() {
925 return state;
926 }
927
928 /** @return true if the close button of the DownloadProgressBar was pressed,
929 * false otherwise such as if the Stop button had been pressed.
930 */
931 private synchronized boolean wasClosed() {
932 return this.wasClosed;
933 }
934
935 /** Returns the current state of the stop flag for this job.
936 * @return A boolean representing whether the user has requested to
937 * stop.
938 */
939 public synchronized boolean hasSignalledStop() {
940 if(state == DownloadJob.STOPPED || state == DownloadJob.PAUSED ||
941 state == DownloadJob.COMPLETE) {
942 return true;
943 }
944 return false;
945 }
946
947 public synchronized void setState(int state) {
948 previous_state = this.state;
949 this.state = state;
950 }
951
952 private synchronized void setClosed() {
953 this.wasClosed = true;
954 }
955
956 /** A convenience call.
957 * @return A String representing the url of the initial url (root node of the mirrored tree).
958 */
959 public String toString() {
960 return download_url;
961 }
962
963 /** Called by the WGet native code to signal the current progress of
964 * downloading.
965 * @param current A long representing the number of bytes that have
966 * been downloaded since the last update.
967 * @param expected A long representing the total number of bytes
968 * expected for this download.
969 */
970 public void updateProgress(long current, long expected) {
971 progress.updateProgress(current, expected);
972 }
973
974
975 /*
976 Go through https://docs.oracle.com/javase/tutorial/essential/concurrency/atomicvars.html series of
977 Java articles on concurrency again.
978 Go through http://docs.oracle.com/javase/tutorial/uiswing/concurrency/
979
980 http://stackoverflow.com/questions/574240/is-there-an-advantage-to-use-a-synchronized-method-instead-of-a-synchronized-blo
981
982 "Not only do synchronized methods not lock the whole class, but they don't lock the whole instance either. Unsynchronized methods in the class may still proceed on the instance."
983 "Only the syncronized methods are locked. If there are fields you use within synced methods that are accessed by unsynced methods, you can run into race conditions."
984
985 "synchronizing on "this" is considered in some circles to be an anti-pattern. The unintended consequence is that outside of the class someone can lock on an object reference that is equal to "this" and prevent other threads from passing the barriers within the class potentially creating a deadlock situation. Creating a "private final Object = new Object();" variable purely for locking purposes is the often used solution. Here's another question relating directly to this issue. http://stackoverflow.com/questions/442564/avoid-synchronizedthis-in-java?lq=1"
986
987 "A private lock is a defensive mechanism, which is never a bad idea.
988
989 Also, as you alluded to, private locks can control granularity. One set of operations on an object might be totally unrelated to another but synchronized(this) will mutually exclude access to all of them."
990
991 http://stackoverflow.com/questions/8393883/is-synchronized-keyword-exception-safe
992 "In any scoped thread-safe block, the moment you get out of it, the thread-safety is gone."
993 "In case of an exception the lock will be released."
994
995 http://stackoverflow.com/questions/8259479/should-i-synchronize-listener-notifications-or-not
996 "Use a CopyOnWriteArrayList for your listener arrays."
997 "If you use the CopyOnWriteArrayList, then you don't have to synchronize when iterating."
998 "CopyOnWriteArrayList is thread-safe, so there is no need to synchronize."
999
1000 "Use a ConcurrentLinkedQueue<Listener> ... for this kind of problems: adding, removing and iterating simultaneously on a collection.
1001 A precision : this solution prevents a listener from being called from the very moment it is deregistered."
1002 "It means that you start iterating, an element is added, it will be called, another is removed, it won't, all this in the same iteration cycle.
1003 It's the best of both world: ensuring synchronization, while being fine grained on who gets called and who's not."
1004
1005 http://stackoverflow.com/questions/8260205/when-a-listener-is-removed-is-it-okay-that-the-event-be-called-on-that-listener
1006
1007 http://stackoverflow.com/questions/2282166/java-synchronizing-on-primitives
1008
1009 1. You can't lock on a primitive and
1010 2. Don't lock on a Long unless you're careful how you construct them. Long values created by autoboxing or Long.valueOf() in a certain range are guaranteed to be the same across the JVM which means other threads could be locking on the same exact Long object and giving you cross-talk. This can be a subtle concurrency bug (similar to locking on intern'ed strings).
1011
1012 Cross-talk:
1013 "In electronics, crosstalk is any phenomenon by which a signal transmitted on one circuit or channel of a transmission system creates an undesired effect in another circuit or channel. Crosstalk is usually caused by undesired capacitive, inductive, or conductive coupling from one circuit, part of a circuit, or channel, to another."
1014 */
1015
1016
1017 // Inner thread class that reads from process downloadfrom.pl's std output stream
1018 private class ProcessOutHandler extends SafeProcess.CustomProcessHandler {
1019
1020 public ProcessOutHandler() {
1021 super(SafeProcess.STDOUT);
1022 }
1023
1024 public void run(Closeable stream) {
1025 InputStream is = (InputStream) stream;
1026 BufferedReader eReader = null;
1027 try {
1028
1029 String message = null;
1030 eReader = new BufferedReader(new InputStreamReader(is));
1031 while(!Thread.currentThread().isInterrupted() && (message = eReader.readLine()) != null) {
1032 if(!message.equals("\n")) {
1033 System.err.println("**** Perl STDOUT: " + message);
1034 }
1035 }
1036 if(Thread.currentThread().isInterrupted()) {
1037 System.err.println("**** Perl INTERRUPTed.");
1038 } else {
1039 System.err.println("**** Perl ENDed.");
1040 }
1041
1042 } catch(Exception e) {
1043 System.err.println("Thread - caught exception: " + e);
1044 } finally {
1045 if(Thread.currentThread().isInterrupted()) {
1046 SafeProcess.log("@@@ Successfully interrupted " + Thread.currentThread().getName() + ".");
1047 }
1048 SafeProcess.closeResource(eReader);
1049 eReader = null;
1050 }
1051 }
1052 }
1053
1054
1055 private class ProcessErrHandler extends SafeProcess.CustomProcessHandler {
1056
1057 public ProcessErrHandler() {
1058 super(SafeProcess.STDERR);
1059 }
1060
1061 public void run(Closeable stream) {
1062 InputStream eis = (InputStream) stream;
1063
1064 BufferedReader br = null;
1065 try {
1066 br = new BufferedReader(new InputStreamReader(eis));
1067
1068 // Capture the standard error stream and search for two particular occurrences.
1069 String line="";
1070 boolean ignore_for_robots = false;
1071 int max_download = DownloadJob.UNKNOWN_MAX;
1072
1073 // handle to outer class objects that need synchronization (on either objects or their methods)
1074 DownloadProgressBar progress = DownloadJob.this.progress;
1075 AppendLineOnlyFileDocument download_log = DownloadJob.this.download_log;
1076
1077 while (!Thread.currentThread().isInterrupted() && (line = br.readLine()) != null
1078 && !line.trim().equals("<<Finished>>") /*&& !isStopped()*/) {
1079 if (max_download == DownloadJob.UNKNOWN_MAX) {
1080 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
1081 max_download = DownloadJob.DEFINED_MAX;
1082 }
1083 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
1084 max_download = DownloadJob.UNDEFINED_MAX;
1085 }
1086 }
1087 else if(max_download == DownloadJob.UNDEFINED_MAX) {
1088 DebugStream.println(line);
1089 download_log.appendLine(line); // now synchronized
1090 // The first magic special test is to see if we've just
1091 // asked for the robots.txt file. If so we ignore
1092 // the next add and then the next complete/error.
1093 if(line.lastIndexOf("robots.txt;") != -1) {
1094 DebugStream.println("***** Requesting robot.txt");
1095 ignore_for_robots = true;
1096 }
1097 // If line contains "=> `" display text as the
1098 // currently downloading url. Unique to add download.
1099 else if(line.lastIndexOf("=> `") != -1) {
1100 if(!ignore_for_robots) {
1101 // Add download
1102 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
1103
1104 // now synchronized
1105 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
1106 }
1107 }
1108 // If line contains "/s) - `" set currently
1109 // downloading url to "Download Complete".
1110 else if(line.lastIndexOf("/s) - `") != -1) {
1111 String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
1112 if(!ignore_for_robots) {
1113 DebugStream.println("Not ignore for robots");
1114 // Download complete
1115 downloadComplete(current_file_downloading); // synchronized
1116 }
1117 else {
1118 DebugStream.println("Ignore for robots");
1119 ignore_for_robots = false;
1120 }
1121 }
1122 // The already there line begins "File `..." However this
1123 // is only true in english, so instead I looked and there
1124 // are few (if any at all) other messages than those above
1125 // and not overwriting messages that use " `" so we'll
1126 // look for that. Note this method is not guarenteed to be
1127 // unique like the previous two.
1128 else if(line.lastIndexOf(" `") != -1) {
1129 // Not Overwriting
1130 DebugStream.println("Already there.");
1131 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
1132
1133 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
1134 downloadWarning();
1135 }
1136 // Any other important message starts with the time in the form hh:mm:ss
1137 else if(line.length() > 7) {
1138 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
1139 if(!ignore_for_robots) {
1140 DebugStream.println("Error.");
1141 downloadFailed();
1142 }
1143 else {
1144 ignore_for_robots = false;
1145 }
1146 }
1147 }
1148 }
1149 else if (max_download == DownloadJob.DEFINED_MAX) {
1150 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
1151 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
1152
1153 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
1154 progress.resetFileCount();
1155 progress.addDownload("files"); // for display: "Downloading files"
1156
1157 }
1158 else if (line.lastIndexOf("<<Done>>") != -1) {
1159 progress.increaseFileCount();
1160 }
1161 else if(line.lastIndexOf("<<Done:") != -1) {
1162 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
1163 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
1164 }
1165
1166 DebugStream.println(line);
1167 download_log.appendLine(line);
1168 }
1169 else {
1170 System.out.println("Error!!");
1171 System.exit(-1);
1172 }
1173 }
1174
1175 } catch (IOException ioe) {
1176 //message(Utility.ERROR, ioe.toString());
1177 //JTest
1178 DebugStream.printStackTrace(ioe);
1179
1180 } finally {
1181 if(Thread.currentThread().isInterrupted()) { // if the thread this class is running in is interrupted
1182 SafeProcess.log("@@@ Successfully interrupted " + Thread.currentThread().getName() + ".");
1183 }
1184
1185 SafeProcess.closeResource(br);
1186 br = null;
1187 }
1188
1189 }
1190 }
1191}
Note: See TracBrowser for help on using the repository browser.