Changeset 7658


Ignore:
Timestamp:
2004-06-29T14:54:18+12:00 (20 years ago)
Author:
kjdon
Message:

renamed collection/Job to collection/DownloadJob. also fixed a bug in the download process. previously we were giving the full path to wget as a prefix - this causes a bug if we try to redownload the same site again because internal links get converted to full paths. instead, we run the process in a particular directory, so wget doesn't get given a prefix and just puts things in the current workign directory with relative urls

Location:
trunk/gli/src/org/greenstone/gatherer
Files:
1 added
1 deleted
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/WGet.java

    r6770 r7658  
    4444import javax.swing.tree.*;
    4545import org.greenstone.gatherer.Gatherer;
    46 import org.greenstone.gatherer.collection.Job;
     46import org.greenstone.gatherer.collection.DownloadJob;
    4747
    4848/** This class provides access to the functionality of the WGet program, either by calling it via a shell script or by the JNI. It maintains a queue of pending jobs, and the component for showing these tasks to the user.
     
    6464    private JPanel list_pane;
    6565    /** The job currently underway. */
    66     private Job job;
     66    private DownloadJob job;
    6767    /** A scroll pane which will be used to display the list of pending tasks. */
    6868    private JScrollPane list_scroll;
     
    9292    /** Called by the WGet native code to inform us of a new download starting within the given job.
    9393     * @param url The url that is being downloaded, as a <strong>String</strong>.
    94      * @see org.greenstone.gatherer.collection.Job
     94     * @see org.greenstone.gatherer.collection.DownloadJob
    9595     */
    9696    public synchronized void addDownload(String url) {
     
    9898    }
    9999
    100     /* Used to advise the Job of a newly parsed link. Its up to Job to decide if it already knows about this url, and if not to update its progress bar.
     100    /* Used to advise the DownloadJob of a newly parsed link. Its up to DownloadJob to decide if it already knows about this url, and if not to update its progress bar.
    101101     * @param url The url in question as a <strong>String</strong>.
    102102     * @param type <i>true</i> if this is an internal link, <i>false</i> for and external one.
    103103     * @return A <i>boolean</i> with a value of <i>true</i> indicating if the url was added, <i>false</i> otherwise.
    104      * @see org.greenstone.gatherer.collection.Job
     104     * @see org.greenstone.gatherer.collection.DownloadJob
    105105     */
    106106    public synchronized boolean addLink(String url, int type) {
     
    167167     * wait until the native job has cleanly exited before removing
    168168     * the job.
    169      * @param delete_me The <strong>Job</strong> that is to be deleted.
    170      */
    171     public void deleteJob(Job delete_me) {
     169     * @param delete_me The <strong>DownloadJob</strong> that is to be deleted.
     170     */
     171    public void deleteDownloadJob(DownloadJob delete_me) {
    172172    if (delete_me == job) {
    173173        // While this seems wasteful its only for the briefest moment.
     
    202202     * for the indicated job, is completed. In turn all download
    203203     * listeners are informed.
    204      * @see org.greenstone.gatherer.collection.Job
     204     * @see org.greenstone.gatherer.collection.DownloadJob
    205205     */
    206206    public synchronized void downloadComplete() {
     
    211211     * a status code other than 200-399 for the specified download job.
    212212     * for.
    213      * @see org.greenstone.gatherer.collection.Job
     213     * @see org.greenstone.gatherer.collection.DownloadJob
    214214     */
    215215    public synchronized void downloadFailed() {
     
    222222     * be downloaded as doing so would clobber an existing file and the -nc
    223223     * flag is set.
    224      * @see org.greenstone.gatherer.collection.Job
     224     * @see org.greenstone.gatherer.collection.DownloadJob
    225225     */
    226226    public synchronized void downloadWarning() {
     
    231231     * scroll pane containing the current list of jobs.
    232232     */
    233     public JScrollPane getJobList() {
     233    public JScrollPane getDownloadJobList() {
    234234    return list_scroll;
    235235    }
     
    238238     * @return A boolean representing whether the user has requested to
    239239     * stop.
    240      * @see org.greenstone.gatherer.collection.Job
     240     * @see org.greenstone.gatherer.collection.DownloadJob
    241241     */
    242242    public synchronized boolean hasSignalledStop() {
     
    253253     * @see org.greenstone.gatherer.Configuration
    254254     * @see org.greenstone.gatherer.Gatherer
    255      * @see org.greenstone.gatherer.collection.Job
    256      * @see org.greenstone.gatherer.gui.GProgressBar
     255     * @see org.greenstone.gatherer.collection.DownloadJob
     256     * @see org.greenstone.gatherer.gui.DownloadProgressBar
    257257     * @see org.greenstone.gatherer.util.GURL
    258258     */
    259     public void newJob(boolean no_parents, boolean other_hosts, boolean page_requisites, URL url, int depth, String destination) {
     259    public void newDownloadJob(boolean no_parents, boolean other_hosts, boolean page_requisites, URL url, int depth, String destination) {
    260260    // Create the job and fill in the details from gatherer.config.
    261261    Gatherer.println("About to create a new job");
    262     // If it was decided not to download page requisites, then create the destination by basing it on the given destination, but appending the url host and paths. If page requisites is used then WGet will do this for us
    263     if(!page_requisites && depth == 0) {
    264         Gatherer.println("Initially destination: " + destination);
    265         String almost_the_entire_path = url.getPath();
    266         int index = -1;
    267         if((index = almost_the_entire_path.lastIndexOf("/")) != -1) {
    268         almost_the_entire_path = almost_the_entire_path.substring(0, index);
    269         }
    270         destination = destination + url.getHost() + almost_the_entire_path;
    271         Gatherer.println("Final destination:    " + destination);
    272     }
    273     Job new_job = new Job(Gatherer.config.get("mirroring.debug", false), no_parents, other_hosts, page_requisites, Gatherer.config.get("mirroring.quiet", false), url, depth, destination, Gatherer.config.proxy_pass, Gatherer.config.proxy_user, this, simple);
     262
     263    DownloadJob new_job = new DownloadJob(Gatherer.config.get("mirroring.debug", false), no_parents, other_hosts, page_requisites, Gatherer.config.get("mirroring.quiet", false), url, depth, destination, Gatherer.config.proxy_pass, Gatherer.config.proxy_user, this, simple);
     264
    274265    // Tell it to run as soon as possible
    275     new_job.setState(Job.RUNNING);
     266    new_job.setState(DownloadJob.RUNNING);
    276267
    277268    // Add to job_queue job list.
     
    303294     * @param expected A long representing the total number of bytes
    304295     * expected for this download.
    305      * @see org.greenstone.gatherer.collection.Job
     296     * @see org.greenstone.gatherer.collection.DownloadJob
    306297     */
    307298    public synchronized void updateProgress(long current, long expected) {
     
    324315     * waiting list.
    325316     * @see org.greenstone.gatherer.Gatherer
    326      * @see org.greenstone.gatherer.collection.Job
     317     * @see org.greenstone.gatherer.collection.DownloadJob
    327318     */
    328319    public void run() {
     
    333324        while(job_queue.size() > 0 && index < job_queue.size()) {
    334325            // Get the first job that isn't stopped.
    335             job = (Job) job_queue.get(index);
    336             if(job.getState() == Job.RUNNING) {
    337             Gatherer.println("Job " + job.toString() + " Begun.");
     326            job = (DownloadJob) job_queue.get(index);
     327            if(job.getState() == DownloadJob.RUNNING) {
     328            Gatherer.println("DownloadJob " + job.toString() + " Begun.");
    338329            // A lock to prevent us deleting this job while its being
    339330            // run, unless you want things to go really wrong.
     
    346337            }
    347338            busy = false;
    348             Gatherer.println("Job " + job.toString() + " complete.");
     339            Gatherer.println("DownloadJob " + job.toString() + " complete.");
    349340            // And if the user has requested that complete jobs
    350341                // be removed, then remove it from the list.
    351             //deleteJob(job);
     342            //deleteDownloadJob(job);
    352343            job = null;
    353344            }
     
    361352        try {
    362353            synchronized(this) {
    363             Gatherer.println("WGet thread is waiting for Jobs.");
     354            Gatherer.println("WGet thread is waiting for DownloadJobs.");
    364355            wait();
    365356            }
Note: See TracChangeset for help on using the changeset viewer.