source: main/trunk/gli/src/org/greenstone/gatherer/download/DownloadJob.java@ 31831

Last change on this file since 31831 was 31831, checked in by ak19, 7 years ago

Part of previous commit. GLI needs to pass WGETRC env var down to perl running wget

  • Property svn:keywords set to Author Date Id Revision
File size: 47.9 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.download;
38
39import java.awt.event.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.tree.*;
44import javax.swing.SwingUtilities;
45import org.greenstone.gatherer.Configuration;
46import org.greenstone.gatherer.DebugStream;
47import org.greenstone.gatherer.Dictionary;
48import org.greenstone.gatherer.Gatherer;
49import org.greenstone.gatherer.GAuthenticator;
50import org.greenstone.gatherer.greenstone.LocalGreenstone;
51import org.greenstone.gatherer.file.WorkspaceTree;
52import org.greenstone.gatherer.util.AppendLineOnlyFileDocument;
53import org.greenstone.gatherer.util.SafeProcess;
54import org.greenstone.gatherer.util.Utility;
55import org.greenstone.gatherer.cdm.Argument;
56import org.greenstone.gatherer.collection.*;
57
58/**
59 * @author John Thompson, Greenstone Digital Library, University of Waikato
60 * @version 2.0
61 * When modifying this class, bear in mind concurrency issues that could arise with
62 * SafeProcess's worker threads and where synchronization may be needed to prevent such issues.
63 */
64public class DownloadJob
65 implements ActionListener, SafeProcess.MainProcessHandler {
66
67 private AppendLineOnlyFileDocument download_log;
68
69 private DownloadProgressBar progress;
70
71 private int previous_state;
72 private int state;
73
74 private SafeProcess prcs = null;
75
76 private final String download_url;
77 private boolean wasClosed = false;
78
79 // private String current_url;
80 // private String destination;
81 private final String proxy_pass;
82 private final String proxy_user;
83
84 //private final Vector encountered_urls;
85 //private Vector failed_urls;
86 private Download download;
87 private DownloadScrollPane mummy;
88 private HashMap download_option;
89
90 public static final int COMPLETE = 0;
91 public static final int PAUSED = 1;
92 public static final int RUNNING = 2;
93 public static final int STOPPED = 3;
94
95 public static final int UNKNOWN_MAX = 0;
96 public static final int DEFINED_MAX = 1;
97 public static final int UNDEFINED_MAX = 2;
98
99 // To prematurely terminate wget, we will need to use sockets and find a free port.
100 // We will look at a limited range of ports. This range will be reused (circular buffer)
101 private static final int PORT_BASE = 50000;
102 private static final int PORT_BLOCK_SIZE = 100;
103 private static int nextFreePort = PORT_BASE; // Keep track what port numbers we have checked for availability
104 int port; // package access. The socket port number this instance of DownloadJob will use
105 // only the main thread (where DownloadJob runs) modifies port, so no synching needed
106
107 private final String mode;
108
109 private String proxy_url; // only the main thread (where DownloadJob runs) modifies this, so no synching needed
110
111 /**
112 */
113 public DownloadJob(Download download, String proxy_pass, String proxy_user, DownloadScrollPane mummy, String mode, String proxy_url) {
114 URL url = null;
115 int folder_hash;
116
117 this.proxy_url = proxy_url;
118
119 download_option = downloadToHashMap(download);
120 if (!mode.equals("Z3950") && !mode.equals("SRW")) {
121 Argument url_arg = (Argument)download_option.get((String)"url");
122 download_url = url_arg.getValue();
123
124 }
125 else {
126 Argument host_arg = (Argument)download_option.get((String)"host");
127 Argument port_arg = (Argument)download_option.get((String)"port");
128 download_url = host_arg.getValue() + ":" +port_arg.getValue();
129 }
130
131 folder_hash = download_url.hashCode();
132 String log_filename = Utility.getLogDir(null) + "download-"+ mode + folder_hash + ".log";
133 File log_file = new File(log_filename);
134 if(log_file.exists()) {
135 log_file.delete();
136 }
137
138 File parent_log_file = log_file.getParentFile();
139 parent_log_file.mkdirs();
140 parent_log_file = null;
141 log_file = null;
142
143 this.download_log = new AppendLineOnlyFileDocument(log_filename, false);
144
145 this.proxy_pass = proxy_pass;
146 this.proxy_user = proxy_user;
147 this.mummy = mummy;
148 this.mode = mode;
149 this.download = download;
150
151 progress = new DownloadProgressBar(this,download_url, true);
152 //encountered_urls = new Vector();
153 //failed_urls = new Vector();
154
155 previous_state = STOPPED;
156 state = STOPPED;
157 }
158
159 private HashMap downloadToHashMap(Download download)
160 {
161 HashMap download_option = new HashMap();
162 ArrayList arguments = download.getArguments(true, false);
163 for(int i = 0; i < arguments.size(); i++) {
164 Argument argument = (Argument) arguments.get(i);
165 download_option.put(argument.getName(), argument);
166 }
167 return download_option;
168 }
169
170 /** Depending on which button on the progress bar was pushed,
171 * this method will affect the state of the DownloadJob and perhaps make
172 * calls to wget.class if necessary.
173 * @param event The ActionEvent fired from within the DownloadProgressBar
174 * which we must respond to.
175 */
176 public void old_actionPerformed(ActionEvent event) {
177 // The stop_start_button is used to alternately start or stop the
178 // job. If the current state of the job is paused then this
179 // restart is logically equivalent to a resume.
180 if(event.getSource() == progress.stop_start_button) {
181 previous_state = state;
182 if (state == RUNNING) {
183 state = STOPPED;
184 } else {
185 //previous_state = state;
186 state = RUNNING;
187 mummy.resumeThread();
188 }
189 }
190 else if (event.getSource() == progress.close_button) {
191 if(state == RUNNING) {
192 previous_state = state;
193 state = STOPPED; // do we need to do anything else to stop this?
194 }
195 mummy.deleteDownloadJob(this);
196 }
197 }
198
199 /** Depending on which button on the progress bar was pushed,
200 * this method will affect the state of the DownloadJob and perhaps make
201 * calls to wget.class if necessary.
202 * @param event The ActionEvent fired from within the DownloadProgressBar
203 * which we must respond to.
204 * Now using synchronized methods like previous_state = getState(); instead of
205 * previous_state = state; and setState(STOPPED); instead of state = STOPPED;
206 */
207 public void actionPerformed(ActionEvent event) {
208 // The stop_start_button is used to alternately start or stop the
209 // job. If the current state of the job is paused then this
210 // restart is logically equivalent to a resume.
211 if(event.getSource() == progress.stop_start_button) {
212 previous_state = getState();
213 if (getState() == RUNNING) {
214 stopDownload(); // cancels any running SafeProcess, will set the current state to STOPPED when the time is right
215 } else {
216 setState(RUNNING);
217 mummy.resumeThread();
218 }
219 }
220 else if (event.getSource() == progress.close_button) {
221 setClosed();
222 SafeProcess.log("@@@ Progress bar close button pressed");
223 if(getState() == RUNNING) {
224 previous_state = getState();
225 stopDownload(); // cancels any running SafeProcess, will set the current state to STOPPED when the time is right
226 }
227 mummy.deleteDownloadJob(this);
228 }
229 }
230
231 /** Given a portnumber to check, returns true if it is available
232 * (if nothing's listening there already). */
233 public static boolean isPortAvailable(int portnum) {
234 Socket tmpSocket = null;
235 try {
236 tmpSocket = new Socket("localhost", portnum);
237 tmpSocket.close();
238 return false;
239
240 } catch(ConnectException ex){
241 // "Signals that an error occurred while attempting to connect a socket
242 // to a remote address and port. Typically, the connection was refused
243 // remotely (e.g., no process is listening on the remote address/port)."
244 System.err.println("Port " + portnum + " not yet in use.");
245 tmpSocket = null;
246 return true;
247
248 } catch(Exception ex) {
249 // includes BindException "Signals that an error occurred while attempting
250 // to bind a socket to a local address and port. Typically, the port is in
251 // use, or the requested local address could not be assigned."
252 tmpSocket = null;
253 return false;
254 }
255 }
256
257 /** Circular buffer. Modifies the value of nextFreePort (the buffer index). */
258 private void incrementNextFreePort() {
259 int offset = nextFreePort - PORT_BASE;
260 offset = (offset + 1) % PORT_BLOCK_SIZE;
261 nextFreePort = PORT_BASE + offset;
262 }
263
264 // If eschewing the use of SafeProcess, reactivate (by renaming) old_callDownload()
265 // and old_actionPerformed(), and DownloadScrollPane.java's old_deleteDownloadJob().
266 public void old_callDownload() {
267
268 ArrayList command_list = new ArrayList();
269
270 // the following also works for client-gli if downloading is enabled (when there's a gs2build directory inside gli)
271 command_list.add(Configuration.perl_path);
272 command_list.add("-S");
273 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
274 command_list.add("-download_mode");
275 command_list.add(mode);
276 command_list.add("-cache_dir");
277 command_list.add(Gatherer.getGLIUserCacheDirectoryPath());
278 // For the purposes of prematurely terminating wget from GLI (which creates a socket
279 // as a communication channel between GLI and Perl), it is important to tell the script
280 // that we're running as GLI. Because when running from the command prompt, it should
281 // not create this socket and do the related processing.
282 command_list.add("-gli");
283
284 ArrayList all_arg = download.getArguments(true,false);
285 for(int i = 0; i < all_arg.size(); i++) {
286 Argument argument = (Argument) all_arg.get(i);
287 if(argument.isAssigned()) {
288 command_list.add("-" + argument.getName());
289 if(argument.getType() != Argument.FLAG) {
290 command_list.add(argument.getValue());
291 }
292 }
293 }
294
295 String [] cmd = (String []) command_list.toArray(new String[0]);
296 DebugStream.println("Download job, "+command_list);
297
298 if (previous_state == DownloadJob.COMPLETE) {
299 progress.mirrorBegun(true, true);
300 }
301 else {
302 progress.mirrorBegun(false, true);
303 }
304
305 try {
306 Runtime rt = Runtime.getRuntime();
307
308 String [] env = null;
309
310 Process prcs = null;
311
312
313 if (Utility.isWindows()) {
314 prcs = rt.exec(cmd);
315 }
316 else {
317 if (proxy_url != null && !proxy_url.equals("")) {
318 // Specify proxies as environment variables
319 // Need to manually specify GSDLHOME and GSDLOS also
320 env = new String[6];
321 proxy_url = proxy_url.replaceAll("http://","");
322 env[0] = "http_proxy=http://"+proxy_url;
323 env[1] = "https_proxy=http://"+proxy_url; // HTTP protocol for https:// too
324 // see also https://wiki.archlinux.org/index.php/proxy_settings
325 env[2] = "ftp_proxy=ftp://"+proxy_url;
326 env[3] = "GSDLHOME=" + Configuration.gsdl_path;
327 env[4] = "GSDLOS=" + Gatherer.client_operating_system;
328 env[5] = "WGETRC=" + LocalGreenstone.getBinOSDirectoryPath(Gatherer.client_operating_system)+"wgetrc"; // teach it where the wgetrc file lives, in gs2build/bin/<os>
329 prcs = rt.exec(cmd, env);
330 }
331 else if(Gatherer.isGsdlRemote && Gatherer.isDownloadEnabled) {
332 // Not Windows, but running client with download panel
333 // Need to manually specify GSDLHOME and GSDLOS
334 env = new String[3];
335 env[0] = "GSDLHOME=" + Configuration.gsdl_path;
336 env[1] = "GSDLOS=" + Gatherer.client_operating_system;
337 env[2] = "WGETRC=" + LocalGreenstone.getBinOSDirectoryPath(Gatherer.client_operating_system)+"wgetrc"; // teach it where the wgetrc file lives, in gs2build/bin/<os>
338 prcs = rt.exec(cmd, env);
339 }
340 else {
341 // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set
342 prcs = rt.exec(cmd);
343 }
344 }
345 //System.out.println(newcmd);
346
347 // Can use the following if debugging WgetDownload.pm - Reads debug stmts from the perl process' STDIN stream
348 //(new PerlReaderThread(prcs)).start();
349
350 InputStream is = prcs.getInputStream();
351 BufferedReader reader = new BufferedReader(new InputStreamReader(is));
352
353 // To be able to stop Wget, we use sockets to communicate with the perl process that launched wget
354 if (mode.equals("Web") || mode.equals("MediaWiki")) { // wget download modes other than OAI
355
356 // Need to find an available (unused) port within the range we're looking for to pass it
357 // the Perl child process, so that it may set up a listening ServerSocket at that port number
358 try {
359 boolean foundFreePort = false;
360 for(int i = 0; i < PORT_BLOCK_SIZE; i++) {
361
362 if(isPortAvailable(nextFreePort)) {
363 foundFreePort = true;
364 break;
365
366 } else {
367 incrementNextFreePort();
368 }
369 }
370
371 if(foundFreePort) {
372 // Free port number currently found becomes the port number of the socket that this
373 // DownloadJob instance will be connecting to when the user wants to prematurely stop Wget.
374 this.port = nextFreePort;
375 incrementNextFreePort();
376
377 } else {
378 throw new Exception("Cannot find an available port in the range "
379 + PORT_BASE + "-" + (PORT_BASE+PORT_BLOCK_SIZE)
380 + "\nwhich is necessary for forcibly terminating wget.");
381 }
382
383 // Communicate the chosen port for this DownloadJob instance to the perl process, so
384 // that it can set up a ServerSocket at that port to listen for any signal to terminate wget
385 OutputStream os = prcs.getOutputStream();
386 String p = ""+this.port+"\n";
387 System.err.println("Portnumber found: " + p);
388
389 os.write(p.getBytes());
390 os.close();
391
392 } catch(Exception ex) {
393 System.err.println("Sent available portnumber " + this.port + " to process' outputstream.\nBut got exception: " + ex);
394 }
395 }
396
397 BufferedReader br = new BufferedReader(new InputStreamReader(prcs.getErrorStream()));
398 // Capture the standard error stream and search for two particular occurrences.
399 String line="";
400 boolean ignore_for_robots = false;
401 int max_download = DownloadJob.UNKNOWN_MAX;
402
403 while ((line = br.readLine()) != null && !line.trim().equals("<<Finished>>") && state != STOPPED) {
404 if ( max_download == DownloadJob.UNKNOWN_MAX) {
405 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
406 max_download = DownloadJob.DEFINED_MAX;
407 }
408 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
409 max_download = DownloadJob.UNDEFINED_MAX;
410 }
411 }
412 else if(max_download == DownloadJob.UNDEFINED_MAX) {
413 DebugStream.println(line);
414 download_log.appendLine(line);
415 // The first magic special test is to see if we've just
416 // asked for the robots.txt file. If so we ignore
417 // the next add and then the next complete/error.
418 if(line.lastIndexOf("robots.txt;") != -1) {
419 DebugStream.println("***** Requesting robot.txt");
420 ignore_for_robots = true;
421 }
422 // If line contains "=> `" display text as the
423 // currently downloading url. Unique to add download.
424 else if(line.lastIndexOf("=> `") != -1) {
425 if(!ignore_for_robots) {
426 // Add download
427 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
428 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
429 }
430 }
431 // If line contains "/s) - `" set currently
432 // downloading url to "Download Complete".
433 else if(line.lastIndexOf("/s) - `") != -1) {
434 String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
435 if(!ignore_for_robots) {
436 DebugStream.println("Not ignore for robots");
437 // Download complete
438 downloadComplete(current_file_downloading);
439 }
440 else {
441 DebugStream.println("Ignore for robots");
442 ignore_for_robots = false;
443 }
444 }
445 // The already there line begins "File `..." However this
446 // is only true in english, so instead I looked and there
447 // are few (if any at all) other messages than those above
448 // and not overwriting messages that use " `" so we'll
449 // look for that. Note this method is not guarenteed to be
450 // unique like the previous two.
451 else if(line.lastIndexOf(" `") != -1) {
452 // Not Overwriting
453 DebugStream.println("Already there.");
454 String new_url =
455 line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
456 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
457 downloadWarning();
458 }
459 // Any other important message starts with the time in the form hh:mm:ss
460 else if(line.length() > 7) {
461 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
462 if(!ignore_for_robots) {
463 DebugStream.println("Error.");
464 downloadFailed();
465 }
466 else {
467 ignore_for_robots = false;
468 }
469 }
470 }
471 }
472 else if (max_download == DownloadJob.DEFINED_MAX) {
473 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
474 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
475 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
476 progress.resetFileCount();
477 progress.addDownload("files"); // for display: "Downloading files"
478 }
479 else if (line.lastIndexOf("<<Done>>") != -1) {
480 progress.increaseFileCount();
481 }
482 else if(line.lastIndexOf("<<Done:") != -1) {
483 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
484 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
485 }
486
487 DebugStream.println(line);
488 download_log.appendLine(line);
489 }
490 else {
491 System.out.println("Error!!");
492 System.exit(-1);
493 }
494 }
495
496 if(state == STOPPED) {
497 boolean terminatePerlScript = true;
498
499 // When GLI is working with wget-based download modes other than OAI (MediaWiki and Web
500 // download) and the STOP button has been pressed, wget needs to be prematurely terminated.
501 // Only wget download modes Web and MediaWiki require the use of sockets to communicate
502 // with the perl script in order to get wget to terminate. Other download modes, including
503 // wgetdownload mode OAI, can terminate in the traditional manner: close process inputstream
504 // and kill perl process. OAI launches many wgets. So that when the perl process is terminated,
505 // the currently running wget will finish off but other wgets are no longer launched.
506 if(prcs != null && (mode.equals("Web") || mode.equals("MediaWiki"))) {
507
508 // create a socket to the perl child process and communicate the STOP message
509 Socket clientSocket = null;
510 if(clientSocket == null) {
511 try {
512 clientSocket = new Socket("localhost", this.port); // connect to the port chosen for this DownloadJob instance
513
514 BufferedReader clientReader = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
515 String response = clientReader.readLine(); // see if we've been connected
516 System.err.println("Communicating with perl download script on port " + this.port
517 + "\nGot response from perl: " + response);
518
519 // Send the STOP signal
520 OutputStream os = clientSocket.getOutputStream();
521 String message = "<<STOP>>\n";
522 os.write(message.getBytes());
523 response = clientReader.readLine(); // see whether the stop signal has been received
524 System.err.println("GLI sent STOP signal to perl to terminate wget."
525 + "\nGot response from perl: " + response);
526
527 response = clientReader.readLine(); // see whether the perl script is ready to be terminated
528 System.err.println("Got another response from perl: " + response);
529 os.close();
530
531 clientReader.close();
532 clientSocket.close(); // close the clientSocket (the Perl end will close the server socket that Perl opened)
533 clientReader = null;
534 clientSocket = null;
535
536 if(response == null) {
537 terminatePerlScript = false;
538 }
539 } catch(IOException ex) {
540 System.err.println("Tried to communicate through client socket - port " + this.port + ", but got exception: " + ex);
541 } catch(Exception ex) {
542 System.err.println("Tried to open client socket, but got exception: " + ex);
543 }
544 }
545 }
546
547 //prcs.getInputStream().close();
548 prcs.getErrorStream().close();
549 br.close();
550 br = null;
551 if(terminatePerlScript) {
552 prcs.destroy(); // This doesn't always work, but it's worth a try
553 prcs = null;
554 }
555
556 // Notify the DownloadScrollPane which is waiting on this job to complete that we are ready
557 synchronized(this) {
558 this.notify();
559 }
560 }
561 }
562 catch (Exception ioe) {
563 //message(Utility.ERROR, ioe.toString());
564 //JTest
565 DebugStream.printStackTrace(ioe);
566 }
567 // If we've got to here and the state isn't STOPPED then the
568 // job is complete.
569 if(state == DownloadJob.RUNNING) {
570 progress.mirrorComplete();
571 previous_state = state;
572 state = DownloadJob.COMPLETE;
573 }
574 // refresh the workspace tree
575 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
576 }
577
578 public void callDownload() {
579
580 ArrayList command_list= new ArrayList();
581
582 // the following also works for client-gli if downloading is enabled (when there's a gs2build directory inside gli)
583 command_list.add(Configuration.perl_path);
584 command_list.add("-S");
585 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
586 command_list.add("-download_mode");
587 command_list.add(mode);
588 command_list.add("-cache_dir");
589 command_list.add(Gatherer.getGLIUserCacheDirectoryPath());
590 // For the purposes of prematurely terminating wget from GLI (which creates a socket
591 // as a communication channel between GLI and Perl), it is important to tell the script
592 // that we're running as GLI. Because when running from the command prompt, it should
593 // not create this socket and do the related processing.
594 command_list.add("-gli");
595
596 ArrayList all_arg = download.getArguments(true,false);
597 for(int i = 0; i < all_arg.size(); i++) {
598 Argument argument = (Argument) all_arg.get(i);
599 if(argument.isAssigned()) {
600 command_list.add("-" + argument.getName());
601 if(argument.getType() != Argument.FLAG) {
602 command_list.add(argument.getValue());
603 }
604 }
605 }
606
607 String [] cmd = (String []) command_list.toArray(new String[0]);
608 DebugStream.println("Download job, "+command_list);
609
610 if (previous_state == DownloadJob.COMPLETE) {
611 progress.mirrorBegun(true, true);
612 }
613 else {
614 progress.mirrorBegun(false, true);
615 }
616
617 try {
618 Runtime rt = Runtime.getRuntime();
619
620 String [] env = null;
621
622 if (Utility.isWindows()) {
623 prcs = new SafeProcess(cmd);
624 }
625 else {
626 if (proxy_url != null && !proxy_url.equals("")) {
627 // Specify proxies as environment variables
628 // Need to manually specify GSDLHOME and GSDLOS also
629 env = new String[6];
630 proxy_url = proxy_url.replaceAll("http://","");
631 env[0] = "http_proxy=http://"+proxy_url;
632 env[1] = "https_proxy=http://"+proxy_url; // HTTP protocol for https:// too
633 // see also https://wiki.archlinux.org/index.php/proxy_settings
634 env[2] = "ftp_proxy=ftp://"+proxy_url;
635 env[3] = "GSDLHOME=" + Configuration.gsdl_path;
636 env[4] = "GSDLOS=" + Gatherer.client_operating_system;
637 env[5] = "WGETRC=" + LocalGreenstone.getBinOSDirectoryPath(Gatherer.client_operating_system)+"wgetrc"; // teach it where the wgetrc file lives, in gs2build/bin/<os>
638 prcs = new SafeProcess(cmd, env, null);
639 }
640 else if(Gatherer.isGsdlRemote && Gatherer.isDownloadEnabled) {
641 // Not Windows, but running client with download panel
642 // Need to manually specify GSDLHOME and GSDLOS
643 env = new String[3];
644 env[0] = "GSDLHOME=" + Configuration.gsdl_path;
645 env[1] = "GSDLOS=" + Gatherer.client_operating_system;
646 env[2] = "WGETRC=" + LocalGreenstone.getBinOSDirectoryPath(Gatherer.client_operating_system)+"wgetrc"; // teach it where the wgetrc file lives, in gs2build/bin/<os>
647 prcs = new SafeProcess(cmd, env, null);
648 }
649 else {
650 // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set
651 prcs = new SafeProcess(cmd);
652 }
653 }
654 //System.out.println(newcmd);
655 prcs.setMainHandler(this); // attach handler to clean up before and after process.destroy()
656 // for which DownloadJob implements SafeProcess.MainProcessHandler
657
658 // To be able to stop Wget, we use sockets to communicate with the perl process that launched wget
659 if (mode.equals("Web") || mode.equals("MediaWiki")) { // wget download modes other than OAI
660
661 // Need to find an available (unused) port within the range we're looking for to pass it
662 // the Perl child process, so that it may set up a listening ServerSocket at that port number
663 try {
664 boolean foundFreePort = false;
665 for(int i = 0; i < PORT_BLOCK_SIZE; i++) {
666
667 if(isPortAvailable(nextFreePort)) {
668 foundFreePort = true;
669 break;
670
671 } else {
672 incrementNextFreePort();
673 }
674 }
675
676 if(foundFreePort) {
677 // Free port number currently found becomes the port number of the socket that this
678 // DownloadJob instance will be connecting to when the user wants to prematurely stop Wget.
679 this.port = nextFreePort;
680 incrementNextFreePort(); //// Necessary?
681
682 } else {
683 throw new Exception("Cannot find an available port in the range "
684 + PORT_BASE + "-" + (PORT_BASE+PORT_BLOCK_SIZE)
685 + "\nwhich is necessary for forcibly terminating wget.");
686 }
687
688 // Communicate the chosen port for this DownloadJob instance to the perl process, so
689 // that it can set up a ServerSocket at that port to listen for any signal to terminate wget
690 //OutputStream os = prcs.getOutputStream();
691 String p = ""+this.port+"\n";
692 System.err.println("Portnumber found: " + p);
693
694 prcs.setInputString(p);
695
696 } catch(Exception ex) {
697 System.err.println("Sent available portnumber " + this.port + " to process' outputstream.\nBut got exception: " + ex);
698 }
699 }
700
701 ProcessErrHandler errHandler = new ProcessErrHandler(); // meaningful output comes from prcs stderr
702 ProcessOutHandler outHandler = new ProcessOutHandler(); // debugging output comes from prcs' stdout
703
704 int exitVal = prcs.runProcess(null, outHandler, errHandler);
705
706 // if prcs is interrupted (cancelled) during the blocking runProcess() call,
707 // as happens on state == STOPPED, then
708 // beforeWaitingForStreamsToEnd() is called before the process' worker threads come to a halt
709 // and afterStreamsEnded() is called when the process' worker threads have halted,
710 // beforeProcessDestroy() is called before the process is destroyed,
711 // and afterProcessDestroy() is called after the proc has been destroyed.
712 // If when beforeWaitingForStreamsEnd() stage the perl was still running but had been
713 // told to stop, then the beforeWaitingForStreamsEnd() method will make sure to communicate
714 // with the perl process over a socket and send it the termination message,
715 // which will also kill any runnning wget that perl launched.
716 // In that case, destroy() is actually called on the process at last.
717
718 }
719 catch (Exception ioe) {
720 SafeProcess.log(ioe);
721 DebugStream.printStackTrace(ioe);
722 }
723
724 // now the process is done, we can at last null it
725 prcs = null;
726
727 // If we've got to here and the state isn't STOPPED then the
728 // job is complete.
729 if(getState() == DownloadJob.RUNNING) {
730 progress.mirrorComplete();
731 previous_state = getState();
732 setState(DownloadJob.COMPLETE);
733 }
734
735 SafeProcess.log("@@@@ DONE callDownload()");
736
737 // refresh the workspace tree
738 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
739 }
740
741 private synchronized boolean isStopped() { return state == STOPPED; }
742
743 // called when the user cancelled the download and we're told to stop both our external perl process
744 // and the wget process that it in turn launched
745 public void stopDownload() {
746 if(prcs != null) {
747 SafeProcess.log("@@@ Going to cancel the SafeProcess...");
748
749 // Whether a process ends naturally or is prematurely ended, beforeWaitingForStreamsToEnd()
750 // will be called. We've hooked this in to calling tellPerlToTerminateWget() only if the
751 // process is still running when cancel is pressed, but not when it's naturally terminated.
752 boolean hadToSendInterrupt = prcs.cancelRunningProcess(); // returns false if it was already terminating/terminated, true if interrupt sent
753
754 } else {
755 System.err.println("@@@@ No SafeProcess to cancel");
756 }
757
758 //setState(STOPPED); // would set it to stop on cancel, even if it already naturally terminated
759
760 }
761
762//*********** START of implementing interface Safeprocess.MainProcessHandler
763 // before and after processDestroy only happen when interrupted AND terminatePerlScript=true
764 public void beforeProcessDestroy() {}
765 public void afterProcessDestroy() {}
766
767 // after blocking call on closing up streamgobbler worker threads that happens
768 // upon natural termination or interruption of process' main body/thread.
769 // if not overriding, then return the parameter forciblyTerminating as-is
770 public boolean afterStreamsEnded(boolean forciblyTerminating) { return forciblyTerminating; }
771
772 // called after the SafeProcess has fully terminated (naturally or via process.destroy())
773 // and has been cleaned up
774 public void doneCleanup(boolean wasForciblyTerminated) {
775 // let the user know they can cancel again now cleanup phase is done
776 progress.enableCancelJob(true);
777
778 if(wasForciblyTerminated) {
779 setState(STOPPED); // sets it to stop only if process truly was prematurely terminated, not merely
780 // if the cancel button was clicked when it had already naturally terminated
781
782 // If the user had pressed the Close button to terminate the running job, then
783 // we're now ready to remove the display of the until now running job
784 // from the download progress bar interface
785 // But don't bother removing the progress bar if the user had only pressed the Stop button
786 if(wasClosed()) {
787 mummy.deleteCurrentDownloadJob(this);
788 }
789 }
790 }
791
792 // before blocking call of ending streamgobbler worker threads that happens
793 // after process' main body/thread has naturally terminated or been interrupted
794 public boolean beforeWaitingForStreamsToEnd(boolean forciblyTerminating) {
795 // let the user know they can't cancel during cleanup phase
796 progress.enableCancelJob(false);
797
798 SafeProcess.log("**** in beforeWaitingForStreamsToEnd()");
799
800 // state would not be STOPPED if cancel was pressed after the process naturally terminated anyway
801 // in that case we don't need to send perl the signal to terminate WGET
802 if(!forciblyTerminating) { //if(!isStopped()) {
803 SafeProcess.log("*** Process not (yet) cancelled/state not (yet) stopped");
804 SafeProcess.log("*** But process has naturally terminated (process streams are being closed before any interruption signal can be received), so won't be destroying process even on interrupt");
805 return false; // for us to be in this method at all with forciblyTerminating being false
806 // means the process is already naturally terminating, so don't unnaturally destroy it
807 }
808
809 // else the process is still running and we've been told to stop, so tell perl to stop wget first
810 // (so that process destroy can then be called thereafter)
811 return tellPerlToTerminateWget();
812 }
813//*********** END of implementing interface Safeprocess.MainProcessHandler
814
815 public boolean tellPerlToTerminateWget() {
816 SafeProcess.log("**** in tellPerlToTerminateWget()");
817
818 boolean terminatePerlScript = true;
819
820 // When GLI is working with wget-based download modes other than OAI (MediaWiki and Web
821 // download) and the STOP button has been pressed, wget needs to be prematurely terminated.
822 // Only wget download modes Web and MediaWiki require the use of sockets to communicate
823 // with the perl script in order to get wget to terminate. Other download modes, including
824 // wgetdownload mode OAI, can terminate in the traditional manner: close process inputstream
825 // and kill perl process. OAI launches many wgets. So that when the perl process is terminated,
826 // the currently running wget will finish off but other wgets are no longer launched.
827 if((mode.equals("Web") || mode.equals("MediaWiki"))) {
828 SafeProcess.log("@@@ Socket communication to end wget");
829 // create a socket to the perl child process and communicate the STOP message
830 Socket clientSocket = null;
831 BufferedReader clientReader = null;
832 OutputStream os = null;
833
834 if(clientSocket == null) {
835 try {
836 clientSocket = new Socket("localhost", this.port); // connect to the port chosen for this DownloadJob instance
837
838 clientReader = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
839 String response = clientReader.readLine(); // see if we've been connected
840 System.err.println("Communicating with perl download script on port " + this.port
841 + "\nGot response from perl: " + response);
842
843 // Send the STOP signal
844 os = clientSocket.getOutputStream();
845 String message = "<<STOP>>\n";
846 os.write(message.getBytes());
847 response = clientReader.readLine(); // see whether the stop signal has been received
848 System.err.println("GLI sent STOP signal to perl to terminate wget."
849 + "\nGot response from perl: " + response);
850
851 response = clientReader.readLine(); // see whether the perl script is ready to be terminated
852 System.err.println("Got another response from perl: " + response);
853
854 if(response == null) { // why? Is it because the process has already terminated naturally if response is null?
855 terminatePerlScript = false;
856 }
857 } catch(IOException ex) {
858 if(ex instanceof IOException && ex.getMessage().indexOf("Connection refused") != -1) {
859 terminatePerlScript = false; // no socket listening on other end because process ended
860 System.err.println("Tried to communicate through client socket - port " + this.port + ", but the process seems to have already ended naturally");
861 } else {
862 System.err.println("Tried to communicate through client socket - port " + this.port + ", but got exception: " + ex);
863 }
864
865 } catch(Exception ex) {
866 System.err.println("Tried to open client socket, but got exception: " + ex);
867 } finally {
868 SafeProcess.closeResource(os);
869 SafeProcess.closeResource(clientReader);
870 SafeProcess.closeSocket(clientSocket); // close the clientSocket (the Perl end will close the server socket that Perl opened)
871 os = null;
872 clientReader = null;
873 clientSocket = null;
874 }
875 }
876 }
877
878 return terminatePerlScript; // if true, it will call destroy() on the SafeProcess' process
879 }
880
881
882 /** Called by the WGet native code when the current download is
883 * completed. In turn all download listeners are informed.
884 */
885 public void downloadComplete() {
886 progress.downloadComplete(); // now this is synchronized
887 }
888
889
890 public void downloadComplete(String current_file_downloading)
891 {
892 progress.downloadComplete(); // now this is synchronized
893 DebugStream.println("Download complete: " + current_file_downloading);
894 }
895
896
897 /** Called by the WGet native code when the requested download returns
898 * a status code other than 200.
899 */
900 public void downloadFailed() {
901 // TODO!!
902 //synchronized(failed_urls) {
903 //failed_urls.add(current_url); // It is the current url that failed
904 //}
905 progress.downloadFailed(); // now this is synchronized
906 //DebugStream.println("Download failed: " + current_url);
907 }
908
909 /**
910 */
911 public void downloadWarning() {
912 progress.downloadWarning(); // now this is synchronized
913 }
914
915 public AppendLineOnlyFileDocument getLogDocument() {
916 return download_log;
917 }
918
919 /**
920 * @return Returns the progress bar associated with this job.
921 */
922 public DownloadProgressBar getProgressBar() {
923 return progress;
924 }
925
926 /** Called to discover if the user wanted this thread to run or if
927 * it is paused.
928 * @return An int representing the current DownloadJob state.
929 */
930 public synchronized int getState() {
931 return state;
932 }
933
934 /** @return true if the close button of the DownloadProgressBar was pressed,
935 * false otherwise such as if the Stop button had been pressed.
936 */
937 private synchronized boolean wasClosed() {
938 return this.wasClosed;
939 }
940
941 /** Returns the current state of the stop flag for this job.
942 * @return A boolean representing whether the user has requested to
943 * stop.
944 */
945 public synchronized boolean hasSignalledStop() {
946 if(state == DownloadJob.STOPPED || state == DownloadJob.PAUSED ||
947 state == DownloadJob.COMPLETE) {
948 return true;
949 }
950 return false;
951 }
952
953 public synchronized void setState(int state) {
954 previous_state = this.state;
955 this.state = state;
956 }
957
958 private synchronized void setClosed() {
959 this.wasClosed = true;
960 }
961
962 /** A convenience call.
963 * @return A String representing the url of the initial url (root node of the mirrored tree).
964 */
965 public String toString() {
966 return download_url;
967 }
968
969 /** Called by the WGet native code to signal the current progress of
970 * downloading.
971 * @param current A long representing the number of bytes that have
972 * been downloaded since the last update.
973 * @param expected A long representing the total number of bytes
974 * expected for this download.
975 */
976 public void updateProgress(long current, long expected) {
977 progress.updateProgress(current, expected);
978 }
979
980
981 /*
982 Go through https://docs.oracle.com/javase/tutorial/essential/concurrency/atomicvars.html series of
983 Java articles on concurrency again.
984 Go through http://docs.oracle.com/javase/tutorial/uiswing/concurrency/
985
986 http://stackoverflow.com/questions/574240/is-there-an-advantage-to-use-a-synchronized-method-instead-of-a-synchronized-blo
987
988 "Not only do synchronized methods not lock the whole class, but they don't lock the whole instance either. Unsynchronized methods in the class may still proceed on the instance."
989 "Only the syncronized methods are locked. If there are fields you use within synced methods that are accessed by unsynced methods, you can run into race conditions."
990
991 "synchronizing on "this" is considered in some circles to be an anti-pattern. The unintended consequence is that outside of the class someone can lock on an object reference that is equal to "this" and prevent other threads from passing the barriers within the class potentially creating a deadlock situation. Creating a "private final Object = new Object();" variable purely for locking purposes is the often used solution. Here's another question relating directly to this issue. http://stackoverflow.com/questions/442564/avoid-synchronizedthis-in-java?lq=1"
992
993 "A private lock is a defensive mechanism, which is never a bad idea.
994
995 Also, as you alluded to, private locks can control granularity. One set of operations on an object might be totally unrelated to another but synchronized(this) will mutually exclude access to all of them."
996
997 http://stackoverflow.com/questions/8393883/is-synchronized-keyword-exception-safe
998 "In any scoped thread-safe block, the moment you get out of it, the thread-safety is gone."
999 "In case of an exception the lock will be released."
1000
1001 http://stackoverflow.com/questions/8259479/should-i-synchronize-listener-notifications-or-not
1002 "Use a CopyOnWriteArrayList for your listener arrays."
1003 "If you use the CopyOnWriteArrayList, then you don't have to synchronize when iterating."
1004 "CopyOnWriteArrayList is thread-safe, so there is no need to synchronize."
1005
1006 "Use a ConcurrentLinkedQueue<Listener> ... for this kind of problems: adding, removing and iterating simultaneously on a collection.
1007 A precision : this solution prevents a listener from being called from the very moment it is deregistered."
1008 "It means that you start iterating, an element is added, it will be called, another is removed, it won't, all this in the same iteration cycle.
1009 It's the best of both world: ensuring synchronization, while being fine grained on who gets called and who's not."
1010
1011 http://stackoverflow.com/questions/8260205/when-a-listener-is-removed-is-it-okay-that-the-event-be-called-on-that-listener
1012
1013 http://stackoverflow.com/questions/2282166/java-synchronizing-on-primitives
1014
1015 1. You can't lock on a primitive and
1016 2. Don't lock on a Long unless you're careful how you construct them. Long values created by autoboxing or Long.valueOf() in a certain range are guaranteed to be the same across the JVM which means other threads could be locking on the same exact Long object and giving you cross-talk. This can be a subtle concurrency bug (similar to locking on intern'ed strings).
1017
1018 Cross-talk:
1019 "In electronics, crosstalk is any phenomenon by which a signal transmitted on one circuit or channel of a transmission system creates an undesired effect in another circuit or channel. Crosstalk is usually caused by undesired capacitive, inductive, or conductive coupling from one circuit, part of a circuit, or channel, to another."
1020 */
1021
1022
1023 // Inner thread class that reads from process downloadfrom.pl's std output stream
1024 private class ProcessOutHandler extends SafeProcess.CustomProcessHandler {
1025
1026 public ProcessOutHandler() {
1027 super(SafeProcess.STDOUT);
1028 }
1029
1030 public void run(Closeable stream) {
1031 InputStream is = (InputStream) stream;
1032 BufferedReader eReader = null;
1033 try {
1034
1035 String message = null;
1036 eReader = new BufferedReader(new InputStreamReader(is));
1037 while(!Thread.currentThread().isInterrupted() && (message = eReader.readLine()) != null) {
1038 if(!message.equals("\n")) {
1039 System.err.println("**** Perl STDOUT: " + message);
1040 }
1041 }
1042 if(Thread.currentThread().isInterrupted()) {
1043 System.err.println("**** Perl INTERRUPTed.");
1044 } else {
1045 System.err.println("**** Perl ENDed.");
1046 }
1047
1048 } catch(Exception e) {
1049 System.err.println("Thread - caught exception: " + e);
1050 } finally {
1051 if(Thread.currentThread().isInterrupted()) {
1052 SafeProcess.log("@@@ Successfully interrupted " + Thread.currentThread().getName() + ".");
1053 }
1054 SafeProcess.closeResource(eReader);
1055 eReader = null;
1056 }
1057 }
1058 }
1059
1060
1061 private class ProcessErrHandler extends SafeProcess.CustomProcessHandler {
1062
1063 public ProcessErrHandler() {
1064 super(SafeProcess.STDERR);
1065 }
1066
1067 public void run(Closeable stream) {
1068 InputStream eis = (InputStream) stream;
1069
1070 BufferedReader br = null;
1071 try {
1072 br = new BufferedReader(new InputStreamReader(eis));
1073
1074 // Capture the standard error stream and search for two particular occurrences.
1075 String line="";
1076 boolean ignore_for_robots = false;
1077 int max_download = DownloadJob.UNKNOWN_MAX;
1078
1079 // handle to outer class objects that need synchronization (on either objects or their methods)
1080 DownloadProgressBar progress = DownloadJob.this.progress;
1081 AppendLineOnlyFileDocument download_log = DownloadJob.this.download_log;
1082
1083 while (!Thread.currentThread().isInterrupted() && (line = br.readLine()) != null
1084 && !line.trim().equals("<<Finished>>") /*&& !isStopped()*/) {
1085 if (max_download == DownloadJob.UNKNOWN_MAX) {
1086 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
1087 max_download = DownloadJob.DEFINED_MAX;
1088 }
1089 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
1090 max_download = DownloadJob.UNDEFINED_MAX;
1091 }
1092 }
1093 else if(max_download == DownloadJob.UNDEFINED_MAX) {
1094 DebugStream.println(line);
1095 download_log.appendLine(line); // now synchronized
1096 // The first magic special test is to see if we've just
1097 // asked for the robots.txt file. If so we ignore
1098 // the next add and then the next complete/error.
1099 if(line.lastIndexOf("robots.txt;") != -1) {
1100 DebugStream.println("***** Requesting robot.txt");
1101 ignore_for_robots = true;
1102 }
1103 // If line contains "=> `" display text as the
1104 // currently downloading url. Unique to add download.
1105 else if(line.lastIndexOf("=> `") != -1) {
1106 if(!ignore_for_robots) {
1107 // Add download
1108 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
1109
1110 // now synchronized
1111 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
1112 }
1113 }
1114 // If line contains "/s) - `" set currently
1115 // downloading url to "Download Complete".
1116 // Currently: on windows ` marks start quote of downloaded file, but on linux ' marks it
1117 else if(line.lastIndexOf("/s) - `") != -1 || line.lastIndexOf("/s) - '") != -1) {
1118 String startChar = (line.lastIndexOf("/s) - `") != -1) ? "`" : "'";
1119 String current_file_downloading = line.substring(line.indexOf(startChar) + 1, line.lastIndexOf("'"));
1120 if(!ignore_for_robots) {
1121 DebugStream.println("Not ignore for robots");
1122 // Download complete
1123 downloadComplete(current_file_downloading); // synchronized
1124 }
1125 else {
1126 DebugStream.println("Ignore for robots");
1127 ignore_for_robots = false;
1128 }
1129 }
1130 // The already there line begins "File `..." However this
1131 // is only true in english, so instead I looked and there
1132 // are few (if any at all) other messages than those above
1133 // and not overwriting messages that use " `" so we'll
1134 // look for that. Note this method is not guarenteed to be
1135 // unique like the previous two.
1136 else if(line.lastIndexOf(" `") != -1) {
1137 // Not Overwriting
1138 DebugStream.println("Already there.");
1139 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
1140
1141 progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1));
1142 downloadWarning();
1143 }
1144 // Any other important message starts with the time in the form hh:mm:ss
1145 else if(line.length() > 7) {
1146 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
1147 if(!ignore_for_robots) {
1148 DebugStream.println("Error.");
1149 downloadFailed();
1150 }
1151 else {
1152 ignore_for_robots = false;
1153 }
1154 }
1155 }
1156 }
1157 else if (max_download == DownloadJob.DEFINED_MAX) {
1158 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
1159 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
1160
1161 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
1162 progress.resetFileCount();
1163 progress.addDownload("files"); // for display: "Downloading files"
1164
1165 }
1166 else if (line.lastIndexOf("<<Done>>") != -1) {
1167 progress.increaseFileCount();
1168 }
1169 else if(line.lastIndexOf("<<Done:") != -1) {
1170 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
1171 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
1172 }
1173
1174 DebugStream.println(line);
1175 download_log.appendLine(line);
1176 }
1177 else {
1178 System.out.println("Error!!");
1179 System.exit(-1);
1180 }
1181 }
1182
1183 } catch (IOException ioe) {
1184 //message(Utility.ERROR, ioe.toString());
1185 //JTest
1186 DebugStream.printStackTrace(ioe);
1187
1188 } finally {
1189 if(Thread.currentThread().isInterrupted()) { // if the thread this class is running in is interrupted
1190 SafeProcess.log("@@@ Successfully interrupted " + Thread.currentThread().getName() + ".");
1191 }
1192
1193 SafeProcess.closeResource(br);
1194 br = null;
1195 }
1196
1197 }
1198 }
1199}
Note: See TracBrowser for help on using the repository browser.