/**
 *#########################################################################
 *
 * A component of the Gatherer application, part of the Greenstone digital
 * library suite from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * <BR><BR>
 *
 * Author: John Thompson, Greenstone Digital Library, University of Waikato
 *
 * <BR><BR>
 *
 * Copyright (C) 1999 New Zealand Digital Library Project
 *
 * <BR><BR>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * <BR><BR>
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * <BR><BR>
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *########################################################################
 */
package org.greenstone.gatherer.collection;

import java.awt.event.*;
import java.io.*;
import java.net.*;
import java.util.*;
import javax.swing.tree.*;
import org.greenstone.gatherer.Configuration;
import org.greenstone.gatherer.DebugStream;
import org.greenstone.gatherer.Dictionary;
import org.greenstone.gatherer.Gatherer;
import org.greenstone.gatherer.WGet;
import org.greenstone.gatherer.file.FileNode;
import org.greenstone.gatherer.file.FileSystemModel;
import org.greenstone.gatherer.file.WorkspaceTreeModel;
import org.greenstone.gatherer.gui.DownloadProgressBar;
import org.greenstone.gatherer.gui.tree.WorkspaceTree;
import org.greenstone.gatherer.util.AppendLineOnlyFileDocument;
import org.greenstone.gatherer.util.GURL;
import org.greenstone.gatherer.util.SynchronizedTreeModelTools;
import org.greenstone.gatherer.util.Utility;
/**
 * @author John Thompson, Greenstone Digital Library, University of Waikato
 * @version 2.0
 */
public class DownloadJob
    implements ActionListener {
	 
    private boolean debug;
    private boolean higher_directories;
    private boolean no_parents;
    private boolean other_hosts; 
    private boolean page_requisites; 
    private boolean quiet;

    private AppendLineOnlyFileDocument download_log;

    private DownloadProgressBar progress;

    private GURL initial = null;
    private GURL url = null;

    // private TreeModel model;

    private int depth;
    private int previous_state;
    private int state;

    private String current_url;
    private String destination;
    private String proxy_pass;
    private String proxy_user;

    private Vector encountered_urls;
    private Vector failed_urls;

    private WGet mummy;

    public static int COMPLETE = 0;
    public static int PAUSED   = 1;
    public static int RUNNING  = 2;
    public static int STOPPED  = 3;

    /**
     */
    public DownloadJob(boolean debug, boolean no_parents, boolean other_hosts, boolean page_requisites, boolean quiet, URL initial, int depth, String destination, String proxy_pass, String proxy_user, WGet mummy, boolean simple) {
	// this.model = model;

	String log_filename = Utility.getLogDir(null) + "wget" + initial.hashCode() + ".log";
	File log_file = new File(log_filename);
	if(log_file.exists()) {
	    log_file.delete();
	}
	File parent_log_file = log_file.getParentFile();
	parent_log_file.mkdirs();
	parent_log_file = null;
	log_file = null;

	this.debug = debug;
	this.download_log = new AppendLineOnlyFileDocument(log_filename, false);
	this.no_parents = no_parents;
	this.other_hosts = other_hosts;
	this.page_requisites = page_requisites;
	this.quiet = quiet;
	this.initial = new GURL(initial);
	this.depth = depth;
	this.destination = destination;
	this.proxy_pass = proxy_pass;
	this.proxy_user = proxy_user;
	this.mummy = mummy;

	progress = new DownloadProgressBar(this, initial.toString(), simple);

	encountered_urls = new Vector();
	failed_urls = new Vector();

	previous_state = STOPPED;
	state = STOPPED;
    }

    /** Depending on which button on the progress bar was pushed,
     * this method will affect the state of the DownloadJob and perhaps make
     * calls to wget.class if necessary.
     * @param event The ActionEvent fired from within the DownloadProgressBar
     * which we must respond to.
     */
    public void actionPerformed(ActionEvent event) {
	// The stop_start_button is used to alternately start or stop the
	// job. If the current state of the job is paused then this
	// restart is logically equivelent to a resume.
	if(event.getSource() == progress.stop_start_button) {
	    previous_state = state;
	    if (state == RUNNING) {
		state = STOPPED;
	    } else {
		//previous_state = state;
		state = RUNNING;
		mummy.resumeThread();
	    }
	}
	else if (event.getSource() == progress.close_button) {
	    if(state == RUNNING) {
		previous_state = state;
		state = STOPPED; // do we need to do anything else to stop this?
	    }
	    // else {
	    mummy.deleteDownloadJob(this);
		// }
	}
    }

    /** Called by the WGet native code to inform us of a new download starting.
     * @param raw_url The url that is being downloaded, as a String.
     */
    public void addDownload(String raw_url) {
	if(!encountered_urls.contains(raw_url)) {
	    encountered_urls.add(raw_url);
	}
	// Regardless create a new GURL
	current_url = raw_url;
	url = new GURL(raw_url);
	progress.addDownload(raw_url);
    }

    /** Used to advise the DownloadJob of a newly parsed link. Its up to DownloadJob
     * to decide if it already knows about this url, and if not to
     * update its progress bar.
     * @param raw_url The url in question as a String.
     * @param type Whether the link is an internal or external link.
     * @return A boolean indicating if the url was added.
     */
    public boolean addLink(String raw_url, int type) {
	///ystem.out.println("addLink("+url+", "+type+")");
	if(!encountered_urls.contains(raw_url)) {
	    // Add it to the urls we've seen.
	    encountered_urls.add(raw_url);
	    // Add it the to links for the current GURL.

	    // Add it to the progress file count.
	    progress.increaseFileCount();
	    return true;
	}
	// Regardless add it to the children links of the current GURL
	initial.addLink(raw_url);

	// We've seen it before. Don't count it again.
	return false;
    }

    public void callWGet() {
	// Build parameter string. Note that we never clobber, and we continue if possible

	// want to always download newer files,  convert non-relative links to relative, always use directories, and only try twice to get a file before giving up
	String command = Configuration.getWGetPath() + " -N -k -x -t 2 "; // + " -nc -c "; 

	if (no_parents) {
	    command = command + "-np ";
	}
	if(depth < 0) {
	    // Infinite recursion
	    command = command + "-r ";
	}
	else if (depth == 0) {
	    // Just this page.
	}
	else if (depth > 0) {
	    // Recursion to the specified depth.
	    command = command + "-r -l" + depth + " ";
	}

	String proxy_url = "";
	// Determine if we have to use a proxy.
	if(Configuration.get("general.use_proxy", true)) {
	    String proxy_host = Configuration.getString("general.proxy_host", true);
	    String proxy_port = Configuration.getString("general.proxy_port", true);
	    // Find out whether the user has already authenticated themselves
	    String user_pass = null;
	    String address = proxy_host + ":" + proxy_port;
	    int count = 0;
	    while(count < 3 && (user_pass = (String) Gatherer.authentications.get(address)) == null) {
		Authenticator.requestPasswordAuthentication(proxy_host, null, Integer.parseInt(proxy_port), "http://", Dictionary.get("WGet.Prompt"), "HTTP");
		count++;
	    }
	    if(count >= 3) {
		state = STOPPED;
		return;
	    }
	    if(user_pass.indexOf("@") != -1) {
		
		// Write the use proxy command - we don't do this anymore, instead we set environment variables - hopefully these can't be spied on like the follwoing can (using ps) - actually the environment stuff didn't work for windows, so lets go back to this
		if (Utility.isWindows()) {
		    command = command + "-e httpproxy=" + proxy_host + ":" + proxy_port + "/ --proxy-user=" + user_pass.substring(0, user_pass.indexOf("@")) + " --proxy-passwd=" + user_pass.substring(user_pass.indexOf("@") + 1) + " -Y on ";
		} else {
		    String user_name = user_pass.substring(0, user_pass.indexOf("@"));
		    String user_pwd = user_pass.substring(user_pass.indexOf("@") + 1);
		    proxy_url = user_name+":"+user_pwd+"@"+proxy_host+":"+proxy_port+"/";
		}

	    }
	    else {
		DebugStream.println("Unknown user/pass");
	    }
	}
		
	// The user can choose to mirror all of the page requisites...
 	if(page_requisites) {
 	    command = command + "-p ";
 	}

	// Download files from other hosts
	if(other_hosts) {
	    command = command + "-H ";
	}

	// Finally tell it the site to download.
	command = command + initial.toString();

	if(previous_state == DownloadJob.COMPLETE) {
	    progress.mirrorBegun(true, true);
	}
	else {
	    progress.mirrorBegun(false, true);
	}		  

	File dest_file = new File(destination);
	if (!dest_file.exists()) {
	    dest_file.mkdirs();
	}
	// Run it
	try {
	    //DebugStream.println("Cmd: " + command); // don't print it out cos it may have the password in it
	    Runtime rt = Runtime.getRuntime();
	    String [] env = null;
	    if (!proxy_url.equals("")) {
		env = new String[2];
		env[0] = "http_proxy=http://"+proxy_url;
		env[1] = "ftp_proxy=ftp://"+proxy_url;
	    }
	    Process prcs = rt.exec(command, env, dest_file);
	    InputStreamReader isr = new InputStreamReader(prcs.getErrorStream());
	    BufferedReader br = new BufferedReader(isr);
	    // Capture the standard error stream and seach for two particular occurances.
	    String line;
	    boolean ignore_for_robots = false;
	    while ((line = br.readLine()) != null && state != STOPPED) {
		
		DebugStream.println(line);
		download_log.appendLine(line);
		// The first magic special test is to see if we've just
		// asked for the robots.txt file. If so we ignore
		// the next add and then the next complete/error.
		if(line.lastIndexOf("robots.txt;") != -1) {
		    DebugStream.println("***** Requesting robot.txt");
		    ignore_for_robots = true;
		}
		// If line contains "=> `" display text as the 
		// currently downloading url. Unique to add download.
		else if(line.lastIndexOf("=> `") != -1) {
		    if(!ignore_for_robots) {
			// Add download
			String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
			addDownload("http:/" + new_url);
		    }
		}
		// If line contains "/s) - `" set currently 
		// downloading url to "Download Complete".
		else if(line.lastIndexOf("/s) - `") != -1) {
		    String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
		    if(!ignore_for_robots) {
			DebugStream.println("Not ignore for robots");
			// Download complete
			downloadComplete(current_file_downloading);
		    }
		    else {
			DebugStream.println("Ignore for robots");
			ignore_for_robots = false;
		    }
		}
		// The already there line begins "File `..." However this
		// is only true in english, so instead I looked and there
		// are few (if any at all) other messages than those above
		// and not overwriting messages that use " `" so we'll 
		// look for that. Note this method is not guarenteed to be
		// unique like the previous two.
		else if(line.lastIndexOf(" `") != -1) {
		    // Not Overwriting
		    DebugStream.println("Already there.");
		    String new_url = 
			line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
		    addDownload("http:/" + new_url);
		    downloadWarning();
		}
		// Any other important message starts with the time in the form hh:mm:ss
		else if(line.length() > 7) {
		    if(line.charAt(2) == ':' && line.charAt(5) == ':') {
			if(!ignore_for_robots) {
			    DebugStream.println("Error.");
			    downloadFailed();
			}
			else {
			    ignore_for_robots = false;
			}
		    }
		}
	    }
	    if(state == STOPPED) {
		isr.close();
		prcs.destroy(); // This doesn't always work, but it's worth a try
	    }
	    else {
		// Now display final message based on exit value
		prcs.waitFor();
	    }
	}
	catch (Exception ioe) {
	    //message(Utility.ERROR, ioe.toString());
	    DebugStream.printStackTrace(ioe);
	}
	// If we've got to here and the state isn't STOPPED then the
	// job is complete.
	if(state == DownloadJob.RUNNING) {
	    progress.mirrorComplete();
	    previous_state = state;
	    state = DownloadJob.COMPLETE;
	    
	}
	// refresh the workspace tree
	Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
    }
    
	
    /** The most important part of the DownloadJob class, this method is
     * responsible for calling the WGet native methods used to 
     * mirror the indicated url. By this stage all the variables
     * necessary should be set and we need only build up the
     * parameter string and make the call.
     */
    public void callWGetNative() {
	Vector args = new Vector();

	// Let the DownloadProgressBar know we're starting, just in case
	// the user hasn't told us to. If this is the second time the
	// urls downloaded and the first attempt was successful (ie 
	// the previous job was complete), then we have the case where 
	// the user is forcing us to remirror. Reset all the values etc
	// if this is the case then reset the variables.
	// Note that this can cause the result line to look something 
	// like this.
	// Downloaded 12 of 12 files (8 warnings, 0 errors).
	// The warnings would be something like, 'File already downloaded'
	// but the total number of files and the file successfully 
	// downloaded will be correct.
	if(previous_state == DownloadJob.COMPLETE) {
	    progress.mirrorBegun(true, false);
	}
	else {
	    progress.mirrorBegun(false, false);
	}

	// Parse arguments into array.
	args.add(Utility.BASE_DIR + "wget");
	args.add("-d");
	args.add("-o");
	args.add("debug.txt");
		  
	if(destination != null) {
	    args.add("-P");
	    args.add(destination);
	}

	if(depth < 0) {
	    // Infinite recursion
	    args.add("-r");
	}
	else if (depth == 0) {
	    // Just this page.
	}
	else if (depth > 0) {
	    // Recursion to the specified depth.
	    args.add("-r");
	    args.add("-l");
	    args.add("" + depth + ""); // Hacky
	}

	if(previous_state == PAUSED) {
	    args.add("-nc");
	    args.add("-c");
	}

	if(proxy_user != null) {
	    args.add("--proxy-user=" + proxy_user);
	    args.add("--proxy-passwd=" + proxy_pass);
	}
		  
	if(page_requisites) {
	    args.add("-p");
	}

	if(quiet) {
	    args.add("-q");
	}

	if(other_hosts) {
	    args.add("-H");
	}

	args.add(initial.toString());

	DebugStream.println("Calling wget ");
	for(Enumeration e = args.elements(); e.hasMoreElements();) {
	    DebugStream.println(e.nextElement() + " ");
	}
	DebugStream.println("");

	// Run home to mummy.
	int value = mummy.wget(args.size(), args.toArray(), debug);

	// If we've got to here and the state isn't STOPPED then the job is complete.
	if(state == RUNNING) {
	    progress.mirrorComplete();
	    previous_state = state;
	    state = COMPLETE;
	}
    }

    /** Called by the WGet native code when the current download is 
     * completed. In turn all download listeners are informed.
     */
    public void downloadComplete() {
	progress.downloadComplete();
	url = null;
	current_url = null;
    }

    public void downloadComplete(String current_file_downloading) {
	progress.downloadComplete();
	DebugStream.println("Current File: " + current_file_downloading);
	// !! TEMPORARILY DISABLED !!
	//WorkspaceTreeModel.refreshWebCacheMappings();
//  	if(Gatherer.g_man.gather_pane.workspace_tree != null) {
//  	    FileSystemModel tree_model = (FileSystemModel) Gatherer.g_man.gather_pane.workspace_tree.getModel();
//  	    File new_file = new File(current_file_downloading);
//  	    File parent_file = new_file.getParentFile();
//  	    String download_cache = Utility.getCacheDir().getAbsolutePath();
//  	    ArrayList raw_path = new ArrayList();
//  	    while(parent_file != null && !parent_file.getAbsolutePath().equals(download_cache)) {
//  		raw_path.add(0, parent_file.getName());
//  		parent_file = parent_file.getParentFile();
//  	    }
//  	    download_cache = null;
//  	    // Add download cache name 
//  	    /** @todo - add to dictionary */
//  	    raw_path.add(0, "Mirroring.Mirror_Cache");
//  	    // And the root node
//  	    raw_path.add(0, tree_model.getRoot());
//  	    TreePath destination_path = new TreePath(raw_path.toArray());
//  	    raw_path = null;
//  	    // Retrieve the destination node
//  	    FileNode destination_node = (FileNode) tree_model.getNode(destination_path);
//  	    // destination_path = null;
//  	    //FileNode new_file_node = new FileNode(new_file);

//  	    // It suddenly occurs to me that by retrieving the destination path, we are causing the potential destination node to map its children which includes the file which I am about to add. Hence I was ending up with two copies.
//  	    ///atherer.println("Ready to insert new FileNode.");
//  	    DebugStream.println("Model:            " + tree_model);
//  	    DebugStream.println("Destination path: " + destination_path);
//  	    destination_node.unmap();
//  	    ///atherer.println("Destination node: " + destination_node);
//  	    ///atherer.println("New node:         " + new_file_node);
//  	    //SynchronizedTreeModelTools.insertNodeInto(tree_model, destination_node, new_file_node);

//  	    //new_file_node = null;
//  	    destination_node = null;
//  	    tree_model = null;
//  	}
//  	url = null;
//  	current_url = null;
    }

    /** Called by the WGet native code when the requested download returns
     * a status code other than 200.
     */
    public void downloadFailed() {
	///ystem.out.println("downloadFailed("+current_url+")");
	failed_urls.add(current_url); // Its the current url thats failed.
	progress.downloadFailed();
    } 
	 
    /**
     */
    public void downloadWarning() {
	progress.downloadWarning();
    }

    /** 
     * @return A String representing the currently downloading url.
     */
    /* private String getCurrent() {
	return current_url;
	} */

    /** 
     * @return A String representing the initial urls host (root node
     * of tree that we are mirroring).
     */
    public String getHost() {
	return url.getHost();
    }

    public AppendLineOnlyFileDocument getLogDocument() {
	return download_log;
    }

    /**
     * @return Returns the progress bar associated with this job.
     */
    public DownloadProgressBar getProgressBar() {
	return progress;
    }

    /** Called to discover if the user wanted this thread to run or if
     * it is paused.
     * @return An int representing the current DownloadJob state.
     */
    public int getState() {
	return state;
    }

    /** Returns the current state of the stop flag for this job.
     * @return A boolean representing whether the user has requested to
     * stop.
     */
    public boolean hasSignalledStop() {
	if(state == DownloadJob.STOPPED || state == DownloadJob.PAUSED || 
	   state == DownloadJob.COMPLETE) {
	    return true;
	}
	return false;
    }

    public void setState(int state) {
	previous_state = this.state;
	this.state = state;
    }

    /** A convinence call.
     * @return A String representing the url of the initial url (root node of the mirrored tree).
     */
    public String toString() {
	return initial.toString();
    }

    /** Called by the WGet native code to signal the current progress of 
     * downloading.
     * @param current A long representing the number of bytes that have 
     * been downloaded since the last update. 
     * @param expected A long representing the total number of bytes
     * expected for this download.
     */
    public void updateProgress(long current, long expected) {
	progress.updateProgress(current, expected);
    }
}