/** *######################################################################### * * A component of the Gatherer application, part of the Greenstone digital * library suite from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * *

* * Author: John Thompson, Greenstone Digital Library, University of Waikato * *

* * Copyright (C) 1999 New Zealand Digital Library Project * *

* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * *

* * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * *

* * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *######################################################################## */ package org.greenstone.gatherer; import java.awt.*; import java.io.*; import java.net.*; import java.util.*; import javax.swing.*; import javax.swing.tree.*; import org.greenstone.gatherer.Gatherer; import org.greenstone.gatherer.collection.Job; /** This class provides access to the functionality of the WGet program, either by calling it via a shell script or by the JNI. It maintains a queue of pending jobs, and the component for showing these tasks to the user. * @author John Thompson, Greenstone Digital Library, University of Waikato * @version 2.3 */ public class WGet extends Thread { /** true if there is a task currently being carried out, false otherwise. */ private boolean busy = false; /** true if verbose debug messages should be displayed, false otherwise. */ private boolean debug = false; /** true if successfully completed tasks should be automatically removed from the job queue. */ private boolean remove_complete_jobs = true; private JPanel filler_pane = null; /** The panel that the task list will be shown in. */ private JPanel list_pane; /** The job currently underway. */ private Job job; /** A scroll pane which will be used to display the list of pending tasks. */ private JScrollPane list_scroll; /** A queue of download tasks. */ private Vector job_queue; /** A static flag used to switch between simple and advanced modes. If true the Process object is used to externally call the Wget program. If false the native WGet libraries are statically loaded and the JNI used to download directly. */ static final private boolean simple = true; /** Load the WGet native library. */ static { if(!simple) { System.load(System.getProperty("user.dir") + File.separator + "libgatherer.so"); } } /** Constructor. Nothing special. */ public WGet() { job = null; job_queue = new Vector(); filler_pane = new JPanel(); list_pane = new JPanel(); list_pane.setLayout(new BoxLayout(list_pane, BoxLayout.Y_AXIS)); //list_pane.setLayout(new GridLayout(height_count,1)); list_scroll = new JScrollPane(list_pane); //list_scroll.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_NEVER); } /** Called by the WGet native code to inform us of a new download starting within the given job. * @param url The url that is being downloaded, as a String. * @see org.greenstone.gatherer.collection.Job */ public synchronized void addDownload(String url) { job.addDownload(url); } /* Used to advise the Job of a newly parsed link. Its up to Job to decide if it already knows about this url, and if not to update its progress bar. * @param url The url in question as a String. * @param type true if this is an internal link, false for and external one. * @return A boolean with a value of true indicating if the url was added, false otherwise. * @see org.greenstone.gatherer.collection.Job */ public synchronized boolean addLink(String url, int type) { return job.addLink(url, type); } /* Whenever files are moved into or out of the collection we need to * run convertLinks on the files remaining. This ensures that we have * the most efficient balance between local and absolute links. * @param records A Vector containing information about the files in * the current collection. From these url and file information is * harvested. */ /* private void convertLinks() { Vector args = new Vector(); Vector files = new Vector(); Vector urls = new Vector(); // Args - there ain't many args.add("-d"); args.add("-o"); args.add("debug.txt"); args.add("-P"); args.add("/tmp/"); */ // Downloaded urls (two entries for each record). The pattern here is: // local file // original url of file /* for(Enumeration e = records.elements(); e.hasMoreElements();) { urls.add(((GURL)e).getLocalName()); urls.add(((GURL)e).getURL()); } */ //urls.add("/tmp//www.cs.waikato.ac.nz/index.html"); //urls.add("http://www.cs.waikato.ac.nz/index.html"); //urls.add("/tmp//www.cs.waikato.ac.nz/events.html"); //urls.add("http://www.cs.waikato.ac.nz/events.html"); //urls.add("/tmp//www.cs.waikato.ac.nz/history.html"); //urls.add("http://www.cs.waikato.ac.nz/history.html"); //urls.add("/tmp//www.cs.waikato.ac.nz/icons/cs_title_logo.gif"); //urls.add("http://www.cs.waikato.ac.nz/icons/cs_title_logo.gif"); //urls.add("/tmp//www.cs.waikato.ac.nz/icons/scms_title_logo.gif"); //urls.add("http://www.cs.waikato.ac.nz/icons/scms_title_logo.gif"); // Downloaded files (html only). We race back through our records // looking for html/text content ones. /* for(Enumeration e = records.elements(); e.hasMoreElements(); ) { GURL record = (GURL)e; if(record.isHTML()) { files.add(record.getLocalName()); } } */ //files.add("/tmp//www.cs.waikato.ac.nz/index.html"); //wren(args.size(), args.toArray(), urls.size(), urls.toArray(), // files.size(), files.toArray()); /* } */ /** This method is called to delete a certain job from the queue. * This job may be pending, complete or even in progress. However * if it is currently downloading then the delete method must * wait until the native job has cleanly exited before removing * the job. * @param delete_me The Job that is to be deleted. */ public void deleteJob(Job delete_me) { if (delete_me == job) { // While this seems wasteful its only for the briefest moment. while(busy) { } job = null; } if (delete_me.hasSignalledStop()) { list_pane.remove(delete_me.getProgressBar()); job_queue.remove(delete_me); // Unfortunately removing a task gets a bit more complicated as we have to resize the filler list_pane.remove(filler_pane); if(job_queue.size() > 0) { Dimension progress_bar_size = delete_me.getProgressBar().getPreferredSize(); Dimension list_pane_size = list_pane.getSize(); int height = list_pane_size.height - (job_queue.size() * progress_bar_size.height); progress_bar_size = null; if(height > 0) { filler_pane.setPreferredSize(new Dimension(list_pane_size.width, height)); list_pane.add(filler_pane); } list_pane_size = null; } list_pane.updateUI(); } else { Gatherer.println("Somehow we're trying to delete a job that is still running."); } } /** Called by the WGet native code when the current download, * for the indicated job, is completed. In turn all download * listeners are informed. * @see org.greenstone.gatherer.collection.Job */ public synchronized void downloadComplete() { job.downloadComplete(); } /** Called by the WGet native code when the requested download returns * a status code other than 200-399 for the specified download job. * for. * @see org.greenstone.gatherer.collection.Job */ public synchronized void downloadFailed() { // Add the failed download as a new job if the user so requests. job.downloadFailed(); } /** Called by the WGet native code when some non-fatal error has caused * a download to fail. An example of a warning would be if a file can't * be downloaded as doing so would clobber an existing file and the -nc * flag is set. * @see org.greenstone.gatherer.collection.Job */ public synchronized void downloadWarning() { job.downloadWarning(); } /** Used by other graphic functions to get a reference to the * scroll pane containing the current list of jobs. */ public JScrollPane getJobList() { return list_scroll; } /** Returns the current state of the stop flag for the job indicated. * @return A boolean representing whether the user has requested to * stop. * @see org.greenstone.gatherer.collection.Job */ public synchronized boolean hasSignalledStop() { return job.hasSignalledStop(); } /** Creates a new mirroring job on the queue given the target url and the destination (private, public). All other details are harvested from the config file, but these two must be captured from the GUI's current state. * @param url a URL which points to the root url for the mirroring * @param model the GTreeModel that any new records should be added to * @param destination the destination file as a String * @see org.greenstone.gatherer.Configuration * @see org.greenstone.gatherer.Gatherer * @see org.greenstone.gatherer.collection.Job * @see org.greenstone.gatherer.gui.GProgressBar * @see org.greenstone.gatherer.util.GURL */ public void newJob(TreeModel model, boolean overwrite, boolean no_parents, boolean other_hosts, boolean page_requisites, URL url, int depth, String destination) { // Create the job and fill in the details from gatherer.config. Gatherer.println("About to create a new job"); // If it was decided not to download page requisites, then create the destination by basing it on the given destination, but appending the url host. If page requisites is used then WGet will do this for us if(!page_requisites) { destination = destination + url.getHost(); } Job new_job = new Job(model, overwrite, Gatherer.config.get("mirroring.debug", false), no_parents, other_hosts, page_requisites, Gatherer.config.get("mirroring.quiet", false), url, depth, destination, Gatherer.config.proxy_pass, Gatherer.config.proxy_user, this, simple); // Add to job_queue job list. job_queue.add(new_job); // Now add it to the visual component, job list. list_pane.remove(filler_pane); Dimension progress_bar_size = new_job.getProgressBar().getPreferredSize(); Dimension list_pane_size = list_pane.getSize(); int height = list_pane_size.height - (job_queue.size() * progress_bar_size.height); progress_bar_size = null; list_pane.add(new_job.getProgressBar()); if(height > 0) { filler_pane.setPreferredSize(new Dimension(list_pane_size.width, height)); list_pane.add(filler_pane); } list_pane_size = null; //list_pane.setAlignmentX(Component.LEFT_ALIGNMENT); list_pane.updateUI(); new_job = null; synchronized(this) { notify(); // Just incase its sleeping. } } /** Called by the WGet native code to signal the current progress of * downloading for the specified job. * @param current A long representing the number of bytes that have * been downloaded since the last update. * @param expected A long representing the total number of bytes * expected for this download. * @see org.greenstone.gatherer.collection.Job */ public synchronized void updateProgress(long current, long expected) { job.updateProgress(current, expected); } /* There may be times when the download thread is sleeping, but the * user has indicated that a previously paused job should now begin * again. The flag within the job will change, so we tell the thread * to start again. */ public void resumeThread() { synchronized(this) { notify(); // Just incase its sleeping. } } /* This begins the WGet thread, which simply iterates through the waiting * jobs attempting each one. Successful downloads are removed from the * waiting list. * @see org.greenstone.gatherer.Gatherer * @see org.greenstone.gatherer.collection.Job */ public void run() { while(true) { // If there are jobs job_queue and we have more room. if(job_queue.size() > 0) { int index = 0; while(index < job_queue.size()) { // Get the first job that isn't stopped. job = (Job) job_queue.get(index); if(job.getState() == Job.RUNNING) { Gatherer.println("Job " + job.toString() + " Begun."); // A lock to prevent us deleting this job while its being // run, unless you want things to go really wrong. busy = true; if(simple) { job.callWGet(); } else { job.callWGetNative(); } busy = false; Gatherer.println("Job " + job.toString() + " complete."); // And if the user has requested that complete jobs // be removed, then remove it from the list. deleteJob(job); job = null; } index++; } } // In order to save processor time, I'll suspend the thread // if theres no advantage to it running. Actions such as // new or complete jobs will resume the thread. else { try { synchronized(this) { Gatherer.println("WGet thread is waiting for Jobs."); wait(); } } catch (InterruptedException e) { // Time to get going again. } } } // End While. } /* Link to the call to the WGet Native method for downloading. * @param argc An int representing the number of elements in argv. * @param argv An array of objects passed as arguments to wget. * @param debug Whether the native code should show java-only debug * messages. */ public native int wget(int argc, Object argv[], boolean debug); /* Link to the call to the WGet Native method for converting url links. * @param argc An int representing the number of object elements in argv. * @param argv An array of objects passed as arguments to wget. * @param urlc An int representing the number of url string elements in urlv. * @param urlv An array of strings representing urls. * @param filec An int representing the number of file string elements in filev. * @param filev An array of strings representing files. */ public native void wren(int argc, Object argv[], int urlc, Object urlv[], int filec, Object filev[]); }