/** *######################################################################### * * A component of the Gatherer application, part of the Greenstone digital * library suite from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * *

* * Author: John Thompson, Greenstone Digital Library, University of Waikato * *

* * Copyright (C) 1999 New Zealand Digital Library Project * *

* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * *

* * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * *

* * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *######################################################################## */ package org.greenstone.gatherer.download; import java.awt.event.*; import java.io.*; import java.net.*; import java.util.*; import javax.swing.tree.*; import javax.swing.SwingUtilities; import org.greenstone.gatherer.Configuration; import org.greenstone.gatherer.DebugStream; import org.greenstone.gatherer.Dictionary; import org.greenstone.gatherer.Gatherer; import org.greenstone.gatherer.GAuthenticator; import org.greenstone.gatherer.greenstone.LocalGreenstone; import org.greenstone.gatherer.file.WorkspaceTree; import org.greenstone.gatherer.util.AppendLineOnlyFileDocument; import org.greenstone.gatherer.util.SafeProcess; import org.greenstone.gatherer.util.Utility; import org.greenstone.gatherer.cdm.Argument; import org.greenstone.gatherer.collection.*; /** * @author John Thompson, Greenstone Digital Library, University of Waikato * @version 2.0 * When modifying this class, bear in mind concurrency issues that could arise with * SafeProcess's worker threads and where synchronization may be needed to prevent such issues. */ public class DownloadJob implements ActionListener, SafeProcess.MainProcessHandler { private AppendLineOnlyFileDocument download_log; private DownloadProgressBar progress; private int previous_state; private int state; private SafeProcess prcs = null; private final String download_url; private boolean wasClosed = false; // private String current_url; // private String destination; private final String proxy_pass; private final String proxy_user; //private final Vector encountered_urls; //private Vector failed_urls; private Download download; private DownloadScrollPane mummy; private HashMap download_option; public static final int COMPLETE = 0; public static final int PAUSED = 1; public static final int RUNNING = 2; public static final int STOPPED = 3; public static final int UNKNOWN_MAX = 0; public static final int DEFINED_MAX = 1; public static final int UNDEFINED_MAX = 2; // To prematurely terminate wget, we will need to use sockets and find a free port. // We will look at a limited range of ports. This range will be reused (circular buffer) private static final int PORT_BASE = 50000; private static final int PORT_BLOCK_SIZE = 100; private static int nextFreePort = PORT_BASE; // Keep track what port numbers we have checked for availability int port; // package access. The socket port number this instance of DownloadJob will use // only the main thread (where DownloadJob runs) modifies port, so no synching needed private final String mode; private String proxy_url; // only the main thread (where DownloadJob runs) modifies this, so no synching needed /** */ public DownloadJob(Download download, String proxy_pass, String proxy_user, DownloadScrollPane mummy, String mode, String proxy_url) { URL url = null; int folder_hash; this.proxy_url = proxy_url; download_option = downloadToHashMap(download); if (!mode.equals("Z3950") && !mode.equals("SRW")) { Argument url_arg = (Argument)download_option.get((String)"url"); download_url = url_arg.getValue(); } else { Argument host_arg = (Argument)download_option.get((String)"host"); Argument port_arg = (Argument)download_option.get((String)"port"); download_url = host_arg.getValue() + ":" +port_arg.getValue(); } folder_hash = download_url.hashCode(); String log_filename = Utility.getLogDir(null) + "download-"+ mode + folder_hash + ".log"; File log_file = new File(log_filename); if(log_file.exists()) { log_file.delete(); } File parent_log_file = log_file.getParentFile(); parent_log_file.mkdirs(); parent_log_file = null; log_file = null; this.download_log = new AppendLineOnlyFileDocument(log_filename, false); this.proxy_pass = proxy_pass; this.proxy_user = proxy_user; this.mummy = mummy; this.mode = mode; this.download = download; progress = new DownloadProgressBar(this,download_url, true); //encountered_urls = new Vector(); //failed_urls = new Vector(); previous_state = STOPPED; state = STOPPED; } private HashMap downloadToHashMap(Download download) { HashMap download_option = new HashMap(); ArrayList arguments = download.getArguments(true, false); for(int i = 0; i < arguments.size(); i++) { Argument argument = (Argument) arguments.get(i); download_option.put(argument.getName(), argument); } return download_option; } /** Depending on which button on the progress bar was pushed, * this method will affect the state of the DownloadJob and perhaps make * calls to wget.class if necessary. * @param event The ActionEvent fired from within the DownloadProgressBar * which we must respond to. */ public void old_actionPerformed(ActionEvent event) { // The stop_start_button is used to alternately start or stop the // job. If the current state of the job is paused then this // restart is logically equivalent to a resume. if(event.getSource() == progress.stop_start_button) { previous_state = state; if (state == RUNNING) { state = STOPPED; } else { //previous_state = state; state = RUNNING; mummy.resumeThread(); } } else if (event.getSource() == progress.close_button) { if(state == RUNNING) { previous_state = state; state = STOPPED; // do we need to do anything else to stop this? } mummy.deleteDownloadJob(this); } } /** Depending on which button on the progress bar was pushed, * this method will affect the state of the DownloadJob and perhaps make * calls to wget.class if necessary. * @param event The ActionEvent fired from within the DownloadProgressBar * which we must respond to. * Now using synchronized methods like previous_state = getState(); instead of * previous_state = state; and setState(STOPPED); instead of state = STOPPED; */ public void actionPerformed(ActionEvent event) { // The stop_start_button is used to alternately start or stop the // job. If the current state of the job is paused then this // restart is logically equivalent to a resume. if(event.getSource() == progress.stop_start_button) { previous_state = getState(); if (getState() == RUNNING) { stopDownload(); // cancels any running SafeProcess, will set the current state to STOPPED when the time is right } else { setState(RUNNING); mummy.resumeThread(); } } else if (event.getSource() == progress.close_button) { setClosed(); SafeProcess.log("@@@ Progress bar close button pressed"); if(getState() == RUNNING) { previous_state = getState(); stopDownload(); // cancels any running SafeProcess, will set the current state to STOPPED when the time is right } mummy.deleteDownloadJob(this); } } /** Given a portnumber to check, returns true if it is available * (if nothing's listening there already). */ public static boolean isPortAvailable(int portnum) { Socket tmpSocket = null; try { tmpSocket = new Socket("localhost", portnum); tmpSocket.close(); return false; } catch(ConnectException ex){ // "Signals that an error occurred while attempting to connect a socket // to a remote address and port. Typically, the connection was refused // remotely (e.g., no process is listening on the remote address/port)." System.err.println("Port " + portnum + " not yet in use."); tmpSocket = null; return true; } catch(Exception ex) { // includes BindException "Signals that an error occurred while attempting // to bind a socket to a local address and port. Typically, the port is in // use, or the requested local address could not be assigned." tmpSocket = null; return false; } } /** Circular buffer. Modifies the value of nextFreePort (the buffer index). */ private void incrementNextFreePort() { int offset = nextFreePort - PORT_BASE; offset = (offset + 1) % PORT_BLOCK_SIZE; nextFreePort = PORT_BASE + offset; } // If eschewing the use of SafeProcess, reactivate (by renaming) old_callDownload() // and old_actionPerformed(), and DownloadScrollPane.java's old_deleteDownloadJob(). public void old_callDownload() { ArrayList command_list = new ArrayList(); // the following also works for client-gli if downloading is enabled (when there's a gs2build directory inside gli) command_list.add(Configuration.perl_path); command_list.add("-S"); command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl"); command_list.add("-download_mode"); command_list.add(mode); command_list.add("-cache_dir"); command_list.add(Gatherer.getGLIUserCacheDirectoryPath()); // For the purposes of prematurely terminating wget from GLI (which creates a socket // as a communication channel between GLI and Perl), it is important to tell the script // that we're running as GLI. Because when running from the command prompt, it should // not create this socket and do the related processing. command_list.add("-gli"); ArrayList all_arg = download.getArguments(true,false); for(int i = 0; i < all_arg.size(); i++) { Argument argument = (Argument) all_arg.get(i); if(argument.isAssigned()) { command_list.add("-" + argument.getName()); if(argument.getType() != Argument.FLAG) { command_list.add(argument.getValue()); } } } String [] cmd = (String []) command_list.toArray(new String[0]); DebugStream.println("Download job, "+command_list); if (previous_state == DownloadJob.COMPLETE) { progress.mirrorBegun(true, true); } else { progress.mirrorBegun(false, true); } try { Runtime rt = Runtime.getRuntime(); String [] env = null; Process prcs = null; if (Utility.isWindows()) { prcs = rt.exec(cmd); } else { if (proxy_url != null && !proxy_url.equals("")) { // Specify proxies as environment variables // Need to manually specify GSDLHOME and GSDLOS also env = new String[4]; proxy_url = proxy_url.replaceAll("http://",""); env[0] = "http_proxy=http://"+proxy_url; env[1] = "ftp_proxy=ftp://"+proxy_url; env[2] = "GSDLHOME=" + Configuration.gsdl_path; env[3] = "GSDLOS=" + Gatherer.client_operating_system; prcs = rt.exec(cmd, env); } else if(Gatherer.isGsdlRemote && Gatherer.isDownloadEnabled) { // Not Windows, but running client with download panel // Need to manually specify GSDLHOME and GSDLOS env = new String[2]; env[0] = "GSDLHOME=" + Configuration.gsdl_path; env[1] = "GSDLOS=" + Gatherer.client_operating_system; prcs = rt.exec(cmd, env); } else { // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set prcs = rt.exec(cmd); } } //System.out.println(newcmd); // Can use the following if debugging WgetDownload.pm - Reads debug stmts from the perl process' STDIN stream //(new PerlReaderThread(prcs)).start(); InputStream is = prcs.getInputStream(); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); // To be able to stop Wget, we use sockets to communicate with the perl process that launched wget if (mode.equals("Web") || mode.equals("MediaWiki")) { // wget download modes other than OAI // Need to find an available (unused) port within the range we're looking for to pass it // the Perl child process, so that it may set up a listening ServerSocket at that port number try { boolean foundFreePort = false; for(int i = 0; i < PORT_BLOCK_SIZE; i++) { if(isPortAvailable(nextFreePort)) { foundFreePort = true; break; } else { incrementNextFreePort(); } } if(foundFreePort) { // Free port number currently found becomes the port number of the socket that this // DownloadJob instance will be connecting to when the user wants to prematurely stop Wget. this.port = nextFreePort; incrementNextFreePort(); } else { throw new Exception("Cannot find an available port in the range " + PORT_BASE + "-" + (PORT_BASE+PORT_BLOCK_SIZE) + "\nwhich is necessary for forcibly terminating wget."); } // Communicate the chosen port for this DownloadJob instance to the perl process, so // that it can set up a ServerSocket at that port to listen for any signal to terminate wget OutputStream os = prcs.getOutputStream(); String p = ""+this.port+"\n"; System.err.println("Portnumber found: " + p); os.write(p.getBytes()); os.close(); } catch(Exception ex) { System.err.println("Sent available portnumber " + this.port + " to process' outputstream.\nBut got exception: " + ex); } } BufferedReader br = new BufferedReader(new InputStreamReader(prcs.getErrorStream())); // Capture the standard error stream and search for two particular occurrences. String line=""; boolean ignore_for_robots = false; int max_download = DownloadJob.UNKNOWN_MAX; while ((line = br.readLine()) != null && !line.trim().equals("<>") && state != STOPPED) { if ( max_download == DownloadJob.UNKNOWN_MAX) { if(line.lastIndexOf("<>") != -1) { max_download = DownloadJob.DEFINED_MAX; } else if (line.lastIndexOf("<>") != -1) { max_download = DownloadJob.UNDEFINED_MAX; } } else if(max_download == DownloadJob.UNDEFINED_MAX) { DebugStream.println(line); download_log.appendLine(line); // The first magic special test is to see if we've just // asked for the robots.txt file. If so we ignore // the next add and then the next complete/error. if(line.lastIndexOf("robots.txt;") != -1) { DebugStream.println("***** Requesting robot.txt"); ignore_for_robots = true; } // If line contains "=> `" display text as the // currently downloading url. Unique to add download. else if(line.lastIndexOf("=> `") != -1) { if(!ignore_for_robots) { // Add download String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'")); progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1)); } } // If line contains "/s) - `" set currently // downloading url to "Download Complete". else if(line.lastIndexOf("/s) - `") != -1) { String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'")); if(!ignore_for_robots) { DebugStream.println("Not ignore for robots"); // Download complete downloadComplete(current_file_downloading); } else { DebugStream.println("Ignore for robots"); ignore_for_robots = false; } } // The already there line begins "File `..." However this // is only true in english, so instead I looked and there // are few (if any at all) other messages than those above // and not overwriting messages that use " `" so we'll // look for that. Note this method is not guarenteed to be // unique like the previous two. else if(line.lastIndexOf(" `") != -1) { // Not Overwriting DebugStream.println("Already there."); String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'")); progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1)); downloadWarning(); } // Any other important message starts with the time in the form hh:mm:ss else if(line.length() > 7) { if(line.charAt(2) == ':' && line.charAt(5) == ':') { if(!ignore_for_robots) { DebugStream.println("Error."); downloadFailed(); } else { ignore_for_robots = false; } } } } else if (max_download == DownloadJob.DEFINED_MAX) { if (line.lastIndexOf("<")); progress.setTotalDownload((Integer.valueOf(total_ID)).intValue()); progress.resetFileCount(); progress.addDownload("files"); // for display: "Downloading files" } else if (line.lastIndexOf("<>") != -1) { progress.increaseFileCount(); } else if(line.lastIndexOf("<")); progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue()); } DebugStream.println(line); download_log.appendLine(line); } else { System.out.println("Error!!"); System.exit(-1); } } if(state == STOPPED) { boolean terminatePerlScript = true; // When GLI is working with wget-based download modes other than OAI (MediaWiki and Web // download) and the STOP button has been pressed, wget needs to be prematurely terminated. // Only wget download modes Web and MediaWiki require the use of sockets to communicate // with the perl script in order to get wget to terminate. Other download modes, including // wgetdownload mode OAI, can terminate in the traditional manner: close process inputstream // and kill perl process. OAI launches many wgets. So that when the perl process is terminated, // the currently running wget will finish off but other wgets are no longer launched. if(prcs != null && (mode.equals("Web") || mode.equals("MediaWiki"))) { // create a socket to the perl child process and communicate the STOP message Socket clientSocket = null; if(clientSocket == null) { try { clientSocket = new Socket("localhost", this.port); // connect to the port chosen for this DownloadJob instance BufferedReader clientReader = new BufferedReader(new InputStreamReader(clientSocket.getInputStream())); String response = clientReader.readLine(); // see if we've been connected System.err.println("Communicating with perl download script on port " + this.port + "\nGot response from perl: " + response); // Send the STOP signal OutputStream os = clientSocket.getOutputStream(); String message = "<>\n"; os.write(message.getBytes()); response = clientReader.readLine(); // see whether the stop signal has been received System.err.println("GLI sent STOP signal to perl to terminate wget." + "\nGot response from perl: " + response); response = clientReader.readLine(); // see whether the perl script is ready to be terminated System.err.println("Got another response from perl: " + response); os.close(); clientReader.close(); clientSocket.close(); // close the clientSocket (the Perl end will close the server socket that Perl opened) clientReader = null; clientSocket = null; if(response == null) { terminatePerlScript = false; } } catch(IOException ex) { System.err.println("Tried to communicate through client socket - port " + this.port + ", but got exception: " + ex); } catch(Exception ex) { System.err.println("Tried to open client socket, but got exception: " + ex); } } } //prcs.getInputStream().close(); prcs.getErrorStream().close(); br.close(); br = null; if(terminatePerlScript) { prcs.destroy(); // This doesn't always work, but it's worth a try prcs = null; } // Notify the DownloadScrollPane which is waiting on this job to complete that we are ready synchronized(this) { this.notify(); } } } catch (Exception ioe) { //message(Utility.ERROR, ioe.toString()); //JTest DebugStream.printStackTrace(ioe); } // If we've got to here and the state isn't STOPPED then the // job is complete. if(state == DownloadJob.RUNNING) { progress.mirrorComplete(); previous_state = state; state = DownloadJob.COMPLETE; } // refresh the workspace tree Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED); } public void callDownload() { ArrayList command_list= new ArrayList(); // the following also works for client-gli if downloading is enabled (when there's a gs2build directory inside gli) command_list.add(Configuration.perl_path); command_list.add("-S"); command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl"); command_list.add("-download_mode"); command_list.add(mode); command_list.add("-cache_dir"); command_list.add(Gatherer.getGLIUserCacheDirectoryPath()); // For the purposes of prematurely terminating wget from GLI (which creates a socket // as a communication channel between GLI and Perl), it is important to tell the script // that we're running as GLI. Because when running from the command prompt, it should // not create this socket and do the related processing. command_list.add("-gli"); ArrayList all_arg = download.getArguments(true,false); for(int i = 0; i < all_arg.size(); i++) { Argument argument = (Argument) all_arg.get(i); if(argument.isAssigned()) { command_list.add("-" + argument.getName()); if(argument.getType() != Argument.FLAG) { command_list.add(argument.getValue()); } } } String [] cmd = (String []) command_list.toArray(new String[0]); DebugStream.println("Download job, "+command_list); if (previous_state == DownloadJob.COMPLETE) { progress.mirrorBegun(true, true); } else { progress.mirrorBegun(false, true); } try { Runtime rt = Runtime.getRuntime(); String [] env = null; if (Utility.isWindows()) { prcs = new SafeProcess(cmd); } else { if (proxy_url != null && !proxy_url.equals("")) { // Specify proxies as environment variables // Need to manually specify GSDLHOME and GSDLOS also env = new String[4]; proxy_url = proxy_url.replaceAll("http://",""); env[0] = "http_proxy=http://"+proxy_url; env[1] = "ftp_proxy=ftp://"+proxy_url; env[2] = "GSDLHOME=" + Configuration.gsdl_path; env[3] = "GSDLOS=" + Gatherer.client_operating_system; prcs = new SafeProcess(cmd, env, null); } else if(Gatherer.isGsdlRemote && Gatherer.isDownloadEnabled) { // Not Windows, but running client with download panel // Need to manually specify GSDLHOME and GSDLOS env = new String[2]; env[0] = "GSDLHOME=" + Configuration.gsdl_path; env[1] = "GSDLOS=" + Gatherer.client_operating_system; prcs = new SafeProcess(cmd, env, null); } else { // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set prcs = new SafeProcess(cmd); } } //System.out.println(newcmd); prcs.setMainHandler(this); // attach handler to clean up before and after process.destroy() // for which DownloadJob implements SafeProcess.MainProcessHandler // To be able to stop Wget, we use sockets to communicate with the perl process that launched wget if (mode.equals("Web") || mode.equals("MediaWiki")) { // wget download modes other than OAI // Need to find an available (unused) port within the range we're looking for to pass it // the Perl child process, so that it may set up a listening ServerSocket at that port number try { boolean foundFreePort = false; for(int i = 0; i < PORT_BLOCK_SIZE; i++) { if(isPortAvailable(nextFreePort)) { foundFreePort = true; break; } else { incrementNextFreePort(); } } if(foundFreePort) { // Free port number currently found becomes the port number of the socket that this // DownloadJob instance will be connecting to when the user wants to prematurely stop Wget. this.port = nextFreePort; incrementNextFreePort(); //// Necessary? } else { throw new Exception("Cannot find an available port in the range " + PORT_BASE + "-" + (PORT_BASE+PORT_BLOCK_SIZE) + "\nwhich is necessary for forcibly terminating wget."); } // Communicate the chosen port for this DownloadJob instance to the perl process, so // that it can set up a ServerSocket at that port to listen for any signal to terminate wget //OutputStream os = prcs.getOutputStream(); String p = ""+this.port+"\n"; System.err.println("Portnumber found: " + p); prcs.setInputString(p); } catch(Exception ex) { System.err.println("Sent available portnumber " + this.port + " to process' outputstream.\nBut got exception: " + ex); } } ProcessErrHandler errHandler = new ProcessErrHandler(); // meaningful output comes from prcs stderr ProcessOutHandler outHandler = new ProcessOutHandler(); // debugging output comes from prcs' stdout int exitVal = prcs.runProcess(null, outHandler, errHandler); // if prcs is interrupted (cancelled) during the blocking runProcess() call, // as happens on state == STOPPED, then // beforeWaitingForStreamsToEnd() is called before the process' worker threads come to a halt // and afterStreamsEnded() is called when the process' worker threads have halted, // beforeProcessDestroy() is called before the process is destroyed, // and afterProcessDestroy() is called after the proc has been destroyed. // If when beforeWaitingForStreamsEnd() stage the perl was still running but had been // told to stop, then the beforeWaitingForStreamsEnd() method will make sure to communicate // with the perl process over a socket and send it the termination message, // which will also kill any runnning wget that perl launched. // In that case, destroy() is actually called on the process at last. } catch (Exception ioe) { SafeProcess.log(ioe); DebugStream.printStackTrace(ioe); } // now the process is done, we can at last null it prcs = null; // If we've got to here and the state isn't STOPPED then the // job is complete. if(getState() == DownloadJob.RUNNING) { progress.mirrorComplete(); previous_state = getState(); setState(DownloadJob.COMPLETE); } SafeProcess.log("@@@@ DONE callDownload()"); // refresh the workspace tree Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED); } private synchronized boolean isStopped() { return state == STOPPED; } // called when the user cancelled the download and we're told to stop both our external perl process // and the wget process that it in turn launched public void stopDownload() { if(prcs != null) { SafeProcess.log("@@@ Going to cancel the SafeProcess..."); // Whether a process ends naturally or is prematurely ended, beforeWaitingForStreamsToEnd() // will be called. We've hooked this in to calling tellPerlToTerminateWget() only if the // process is still running when cancel is pressed, but not when it's naturally terminated. boolean hadToSendInterrupt = prcs.cancelRunningProcess(); // returns false if it was already terminating/terminated, true if interrupt sent } else { System.err.println("@@@@ No SafeProcess to cancel"); } //setState(STOPPED); // would set it to stop on cancel, even if it already naturally terminated } //*********** START of implementing interface Safeprocess.MainProcessHandler // before and after processDestroy only happen when interrupted AND terminatePerlScript=true public void beforeProcessDestroy() {} public void afterProcessDestroy() {} // after blocking call on closing up streamgobbler worker threads that happens // upon natural termination or interruption of process' main body/thread. // if not overriding, then return the parameter forciblyTerminating as-is public boolean afterStreamsEnded(boolean forciblyTerminating) { return forciblyTerminating; } // called after the SafeProcess has fully terminated (naturally or via process.destroy()) // and has been cleaned up public void doneCleanup(boolean wasForciblyTerminated) { // let the user know they can cancel again now cleanup phase is done progress.enableCancelJob(true); if(wasForciblyTerminated) { setState(STOPPED); // sets it to stop only if process truly was prematurely terminated, not merely // if the cancel button was clicked when it had already naturally terminated // If the user had pressed the Close button to terminate the running job, then // we're now ready to remove the display of the until now running job // from the download progress bar interface // But don't bother removing the progress bar if the user had only pressed the Stop button if(wasClosed()) { mummy.deleteCurrentDownloadJob(this); } } } // before blocking call of ending streamgobbler worker threads that happens // after process' main body/thread has naturally terminated or been interrupted public boolean beforeWaitingForStreamsToEnd(boolean forciblyTerminating) { // let the user know they can't cancel during cleanup phase progress.enableCancelJob(false); SafeProcess.log("**** in beforeWaitingForStreamsToEnd()"); // state would not be STOPPED if cancel was pressed after the process naturally terminated anyway // in that case we don't need to send perl the signal to terminate WGET if(!forciblyTerminating) { //if(!isStopped()) { SafeProcess.log("*** Process not (yet) cancelled/state not (yet) stopped"); SafeProcess.log("*** But process has naturally terminated (process streams are being closed before any interruption signal can be received), so won't be destroying process even on interrupt"); return false; // for us to be in this method at all with forciblyTerminating being false // means the process is already naturally terminating, so don't unnaturally destroy it } // else the process is still running and we've been told to stop, so tell perl to stop wget first // (so that process destroy can then be called thereafter) return tellPerlToTerminateWget(); } //*********** END of implementing interface Safeprocess.MainProcessHandler public boolean tellPerlToTerminateWget() { SafeProcess.log("**** in tellPerlToTerminateWget()"); boolean terminatePerlScript = true; // When GLI is working with wget-based download modes other than OAI (MediaWiki and Web // download) and the STOP button has been pressed, wget needs to be prematurely terminated. // Only wget download modes Web and MediaWiki require the use of sockets to communicate // with the perl script in order to get wget to terminate. Other download modes, including // wgetdownload mode OAI, can terminate in the traditional manner: close process inputstream // and kill perl process. OAI launches many wgets. So that when the perl process is terminated, // the currently running wget will finish off but other wgets are no longer launched. if((mode.equals("Web") || mode.equals("MediaWiki"))) { SafeProcess.log("@@@ Socket communication to end wget"); // create a socket to the perl child process and communicate the STOP message Socket clientSocket = null; BufferedReader clientReader = null; OutputStream os = null; if(clientSocket == null) { try { clientSocket = new Socket("localhost", this.port); // connect to the port chosen for this DownloadJob instance clientReader = new BufferedReader(new InputStreamReader(clientSocket.getInputStream())); String response = clientReader.readLine(); // see if we've been connected System.err.println("Communicating with perl download script on port " + this.port + "\nGot response from perl: " + response); // Send the STOP signal os = clientSocket.getOutputStream(); String message = "<>\n"; os.write(message.getBytes()); response = clientReader.readLine(); // see whether the stop signal has been received System.err.println("GLI sent STOP signal to perl to terminate wget." + "\nGot response from perl: " + response); response = clientReader.readLine(); // see whether the perl script is ready to be terminated System.err.println("Got another response from perl: " + response); if(response == null) { // why? Is it because the process has already terminated naturally if response is null? terminatePerlScript = false; } } catch(IOException ex) { if(ex instanceof IOException && ex.getMessage().indexOf("Connection refused") != -1) { terminatePerlScript = false; // no socket listening on other end because process ended System.err.println("Tried to communicate through client socket - port " + this.port + ", but the process seems to have already ended naturally"); } else { System.err.println("Tried to communicate through client socket - port " + this.port + ", but got exception: " + ex); } } catch(Exception ex) { System.err.println("Tried to open client socket, but got exception: " + ex); } finally { SafeProcess.closeResource(os); SafeProcess.closeResource(clientReader); SafeProcess.closeSocket(clientSocket); // close the clientSocket (the Perl end will close the server socket that Perl opened) os = null; clientReader = null; clientSocket = null; } } } return terminatePerlScript; // if true, it will call destroy() on the SafeProcess' process } /** Called by the WGet native code when the current download is * completed. In turn all download listeners are informed. */ public void downloadComplete() { progress.downloadComplete(); // now this is synchronized } public void downloadComplete(String current_file_downloading) { progress.downloadComplete(); // now this is synchronized DebugStream.println("Download complete: " + current_file_downloading); } /** Called by the WGet native code when the requested download returns * a status code other than 200. */ public void downloadFailed() { // TODO!! //synchronized(failed_urls) { //failed_urls.add(current_url); // It is the current url that failed //} progress.downloadFailed(); // now this is synchronized //DebugStream.println("Download failed: " + current_url); } /** */ public void downloadWarning() { progress.downloadWarning(); // now this is synchronized } public AppendLineOnlyFileDocument getLogDocument() { return download_log; } /** * @return Returns the progress bar associated with this job. */ public DownloadProgressBar getProgressBar() { return progress; } /** Called to discover if the user wanted this thread to run or if * it is paused. * @return An int representing the current DownloadJob state. */ public synchronized int getState() { return state; } /** @return true if the close button of the DownloadProgressBar was pressed, * false otherwise such as if the Stop button had been pressed. */ private synchronized boolean wasClosed() { return this.wasClosed; } /** Returns the current state of the stop flag for this job. * @return A boolean representing whether the user has requested to * stop. */ public synchronized boolean hasSignalledStop() { if(state == DownloadJob.STOPPED || state == DownloadJob.PAUSED || state == DownloadJob.COMPLETE) { return true; } return false; } public synchronized void setState(int state) { previous_state = this.state; this.state = state; } private synchronized void setClosed() { this.wasClosed = true; } /** A convenience call. * @return A String representing the url of the initial url (root node of the mirrored tree). */ public String toString() { return download_url; } /** Called by the WGet native code to signal the current progress of * downloading. * @param current A long representing the number of bytes that have * been downloaded since the last update. * @param expected A long representing the total number of bytes * expected for this download. */ public void updateProgress(long current, long expected) { progress.updateProgress(current, expected); } /* Go through https://docs.oracle.com/javase/tutorial/essential/concurrency/atomicvars.html series of Java articles on concurrency again. Go through http://docs.oracle.com/javase/tutorial/uiswing/concurrency/ http://stackoverflow.com/questions/574240/is-there-an-advantage-to-use-a-synchronized-method-instead-of-a-synchronized-blo "Not only do synchronized methods not lock the whole class, but they don't lock the whole instance either. Unsynchronized methods in the class may still proceed on the instance." "Only the syncronized methods are locked. If there are fields you use within synced methods that are accessed by unsynced methods, you can run into race conditions." "synchronizing on "this" is considered in some circles to be an anti-pattern. The unintended consequence is that outside of the class someone can lock on an object reference that is equal to "this" and prevent other threads from passing the barriers within the class potentially creating a deadlock situation. Creating a "private final Object = new Object();" variable purely for locking purposes is the often used solution. Here's another question relating directly to this issue. http://stackoverflow.com/questions/442564/avoid-synchronizedthis-in-java?lq=1" "A private lock is a defensive mechanism, which is never a bad idea. Also, as you alluded to, private locks can control granularity. One set of operations on an object might be totally unrelated to another but synchronized(this) will mutually exclude access to all of them." http://stackoverflow.com/questions/8393883/is-synchronized-keyword-exception-safe "In any scoped thread-safe block, the moment you get out of it, the thread-safety is gone." "In case of an exception the lock will be released." http://stackoverflow.com/questions/8259479/should-i-synchronize-listener-notifications-or-not "Use a CopyOnWriteArrayList for your listener arrays." "If you use the CopyOnWriteArrayList, then you don't have to synchronize when iterating." "CopyOnWriteArrayList is thread-safe, so there is no need to synchronize." "Use a ConcurrentLinkedQueue ... for this kind of problems: adding, removing and iterating simultaneously on a collection. A precision : this solution prevents a listener from being called from the very moment it is deregistered." "It means that you start iterating, an element is added, it will be called, another is removed, it won't, all this in the same iteration cycle. It's the best of both world: ensuring synchronization, while being fine grained on who gets called and who's not." http://stackoverflow.com/questions/8260205/when-a-listener-is-removed-is-it-okay-that-the-event-be-called-on-that-listener http://stackoverflow.com/questions/2282166/java-synchronizing-on-primitives 1. You can't lock on a primitive and 2. Don't lock on a Long unless you're careful how you construct them. Long values created by autoboxing or Long.valueOf() in a certain range are guaranteed to be the same across the JVM which means other threads could be locking on the same exact Long object and giving you cross-talk. This can be a subtle concurrency bug (similar to locking on intern'ed strings). Cross-talk: "In electronics, crosstalk is any phenomenon by which a signal transmitted on one circuit or channel of a transmission system creates an undesired effect in another circuit or channel. Crosstalk is usually caused by undesired capacitive, inductive, or conductive coupling from one circuit, part of a circuit, or channel, to another." */ // Inner thread class that reads from process downloadfrom.pl's std output stream private class ProcessOutHandler extends SafeProcess.CustomProcessHandler { public ProcessOutHandler() { super(SafeProcess.STDOUT); } public void run(Closeable stream) { InputStream is = (InputStream) stream; BufferedReader eReader = null; try { String message = null; eReader = new BufferedReader(new InputStreamReader(is)); while(!Thread.currentThread().isInterrupted() && (message = eReader.readLine()) != null) { if(!message.equals("\n")) { System.err.println("**** Perl STDOUT: " + message); } } if(Thread.currentThread().isInterrupted()) { System.err.println("**** Perl INTERRUPTed."); } else { System.err.println("**** Perl ENDed."); } } catch(Exception e) { System.err.println("Thread - caught exception: " + e); } finally { if(Thread.currentThread().isInterrupted()) { SafeProcess.log("@@@ Successfully interrupted " + Thread.currentThread().getName() + "."); } SafeProcess.closeResource(eReader); eReader = null; } } } private class ProcessErrHandler extends SafeProcess.CustomProcessHandler { public ProcessErrHandler() { super(SafeProcess.STDERR); } public void run(Closeable stream) { InputStream eis = (InputStream) stream; BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(eis)); // Capture the standard error stream and search for two particular occurrences. String line=""; boolean ignore_for_robots = false; int max_download = DownloadJob.UNKNOWN_MAX; // handle to outer class objects that need synchronization (on either objects or their methods) DownloadProgressBar progress = DownloadJob.this.progress; AppendLineOnlyFileDocument download_log = DownloadJob.this.download_log; while (!Thread.currentThread().isInterrupted() && (line = br.readLine()) != null && !line.trim().equals("<>") /*&& !isStopped()*/) { if (max_download == DownloadJob.UNKNOWN_MAX) { if(line.lastIndexOf("<>") != -1) { max_download = DownloadJob.DEFINED_MAX; } else if (line.lastIndexOf("<>") != -1) { max_download = DownloadJob.UNDEFINED_MAX; } } else if(max_download == DownloadJob.UNDEFINED_MAX) { DebugStream.println(line); download_log.appendLine(line); // now synchronized // The first magic special test is to see if we've just // asked for the robots.txt file. If so we ignore // the next add and then the next complete/error. if(line.lastIndexOf("robots.txt;") != -1) { DebugStream.println("***** Requesting robot.txt"); ignore_for_robots = true; } // If line contains "=> `" display text as the // currently downloading url. Unique to add download. else if(line.lastIndexOf("=> `") != -1) { if(!ignore_for_robots) { // Add download String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'")); // now synchronized progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1)); } } // If line contains "/s) - `" set currently // downloading url to "Download Complete". else if(line.lastIndexOf("/s) - `") != -1) { String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'")); if(!ignore_for_robots) { DebugStream.println("Not ignore for robots"); // Download complete downloadComplete(current_file_downloading); // synchronized } else { DebugStream.println("Ignore for robots"); ignore_for_robots = false; } } // The already there line begins "File `..." However this // is only true in english, so instead I looked and there // are few (if any at all) other messages than those above // and not overwriting messages that use " `" so we'll // look for that. Note this method is not guarenteed to be // unique like the previous two. else if(line.lastIndexOf(" `") != -1) { // Not Overwriting DebugStream.println("Already there."); String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'")); progress.addDownload("file"); //addDownload("http:/" + new_url.substring(cachedir_prefix_length()-1)); downloadWarning(); } // Any other important message starts with the time in the form hh:mm:ss else if(line.length() > 7) { if(line.charAt(2) == ':' && line.charAt(5) == ':') { if(!ignore_for_robots) { DebugStream.println("Error."); downloadFailed(); } else { ignore_for_robots = false; } } } } else if (max_download == DownloadJob.DEFINED_MAX) { if (line.lastIndexOf("<")); progress.setTotalDownload((Integer.valueOf(total_ID)).intValue()); progress.resetFileCount(); progress.addDownload("files"); // for display: "Downloading files" } else if (line.lastIndexOf("<>") != -1) { progress.increaseFileCount(); } else if(line.lastIndexOf("<")); progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue()); } DebugStream.println(line); download_log.appendLine(line); } else { System.out.println("Error!!"); System.exit(-1); } } } catch (IOException ioe) { //message(Utility.ERROR, ioe.toString()); //JTest DebugStream.printStackTrace(ioe); } finally { if(Thread.currentThread().isInterrupted()) { // if the thread this class is running in is interrupted SafeProcess.log("@@@ Successfully interrupted " + Thread.currentThread().getName() + "."); } SafeProcess.closeResource(br); br = null; } } } }