source: trunk/gli/src/org/greenstone/gatherer/WGet.java@ 8236

Last change on this file since 8236 was 8236, checked in by mdewsnip, 20 years ago

Replaced all Gatherer.print* with DebugStream.print*.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.7 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer;
38
39import java.awt.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.*;
44import javax.swing.tree.*;
45import org.greenstone.gatherer.Gatherer;
46import org.greenstone.gatherer.collection.DownloadJob;
47
48/** This class provides access to the functionality of the WGet program, either by calling it via a shell script or by the JNI. It maintains a queue of pending jobs, and the component for showing these tasks to the user.
49 * @author John Thompson, Greenstone Digital Library, University of Waikato
50 * @version 2.3
51 */
52public class WGet
53 extends Thread {
54
55 /** <i>true</i> if there is a task currently being carried out, <i>false</i> otherwise. */
56 private boolean busy = false;
57 /** <i>true</i> if verbose debug messages should be displayed, <i>false</i> otherwise. */
58 private boolean debug = false;
59 /** <i>true</i> if successfully completed tasks should be automatically removed from the job queue. */
60 private boolean remove_complete_jobs = true;
61
62 private JPanel filler_pane = null;
63 /** The panel that the task list will be shown in. */
64 private JPanel list_pane;
65 /** The job currently underway. */
66 private DownloadJob job;
67 /** A scroll pane which will be used to display the list of pending tasks. */
68 private JScrollPane list_scroll;
69 /** A queue of download tasks. */
70 private Vector job_queue;
71 /** A static flag used to switch between simple and advanced modes. If <i>true</i> the Process object is used to externally call the Wget program. If <i>false</i> the native WGet libraries are statically loaded and the JNI used to download directly. */
72 static final private boolean simple = true;
73 /** Load the WGet native library. */
74 static {
75 if(!simple) {
76 System.load(System.getProperty("user.dir") + File.separator + "libgatherer.so");
77 }
78 }
79
80 /** Constructor. Nothing special. */
81 public WGet() {
82 job = null;
83 job_queue = new Vector();
84 filler_pane = new JPanel();
85 list_pane = new JPanel();
86 list_pane.setLayout(new BoxLayout(list_pane, BoxLayout.Y_AXIS));
87 //list_pane.setLayout(new GridLayout(height_count,1));
88 list_scroll = new JScrollPane(list_pane);
89 //list_scroll.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_NEVER);
90 }
91
92 /** Called by the WGet native code to inform us of a new download starting within the given job.
93 * @param url The url that is being downloaded, as a <strong>String</strong>.
94 * @see org.greenstone.gatherer.collection.DownloadJob
95 */
96 public synchronized void addDownload(String url) {
97 job.addDownload(url);
98 }
99
100 /* Used to advise the DownloadJob of a newly parsed link. Its up to DownloadJob to decide if it already knows about this url, and if not to update its progress bar.
101 * @param url The url in question as a <strong>String</strong>.
102 * @param type <i>true</i> if this is an internal link, <i>false</i> for and external one.
103 * @return A <i>boolean</i> with a value of <i>true</i> indicating if the url was added, <i>false</i> otherwise.
104 * @see org.greenstone.gatherer.collection.DownloadJob
105 */
106 public synchronized boolean addLink(String url, int type) {
107 return job.addLink(url, type);
108 }
109
110 /* Whenever files are moved into or out of the collection we need to
111 * run convertLinks on the files remaining. This ensures that we have
112 * the most efficient balance between local and absolute links.
113 * @param records A <strong>Vector</strong> containing information about the files in
114 * the current collection. From these url and file information is
115 * harvested.
116 */
117 /* private void convertLinks() {
118 Vector args = new Vector();
119 Vector files = new Vector();
120 Vector urls = new Vector();
121 // Args - there ain't many
122 args.add("-d");
123 args.add("-o");
124 args.add("debug.txt");
125 args.add("-P");
126 args.add("/tmp/"); */
127
128 // Downloaded urls (two entries for each record). The pattern here is:
129 // local file
130 // original url of file
131 /*
132 for(Enumeration e = records.elements(); e.hasMoreElements();) {
133 urls.add(((GURL)e).getLocalName());
134 urls.add(((GURL)e).getURL());
135 }
136 */
137 //urls.add("/tmp//www.cs.waikato.ac.nz/index.html");
138 //urls.add("http://www.cs.waikato.ac.nz/index.html");
139 //urls.add("/tmp//www.cs.waikato.ac.nz/events.html");
140 //urls.add("http://www.cs.waikato.ac.nz/events.html");
141 //urls.add("/tmp//www.cs.waikato.ac.nz/history.html");
142 //urls.add("http://www.cs.waikato.ac.nz/history.html");
143 //urls.add("/tmp//www.cs.waikato.ac.nz/icons/cs_title_logo.gif");
144 //urls.add("http://www.cs.waikato.ac.nz/icons/cs_title_logo.gif");
145 //urls.add("/tmp//www.cs.waikato.ac.nz/icons/scms_title_logo.gif");
146 //urls.add("http://www.cs.waikato.ac.nz/icons/scms_title_logo.gif");
147
148 // Downloaded files (html only). We race back through our records
149 // looking for html/text content ones.
150 /*
151 for(Enumeration e = records.elements(); e.hasMoreElements(); ) {
152 GURL record = (GURL)e;
153 if(record.isHTML()) {
154 files.add(record.getLocalName());
155 }
156 }
157 */
158 //files.add("/tmp//www.cs.waikato.ac.nz/index.html");
159
160 //wren(args.size(), args.toArray(), urls.size(), urls.toArray(),
161 // files.size(), files.toArray());
162 /* } */
163
164 /** This method is called to delete a certain job from the queue.
165 * This job may be pending, complete or even in progress. However
166 * if it is currently downloading then the delete method must
167 * wait until the native job has cleanly exited before removing
168 * the job.
169 * @param delete_me The <strong>DownloadJob</strong> that is to be deleted.
170 */
171 public void deleteDownloadJob(DownloadJob delete_me) {
172 if (delete_me == job) {
173 // While this seems wasteful its only for the briefest moment.
174 while(busy) {
175 }
176 job = null;
177 }
178 if (delete_me.hasSignalledStop()) {
179 list_pane.remove(delete_me.getProgressBar());
180 job_queue.remove(delete_me);
181 // Unfortunately removing a task gets a bit more complicated as we have to resize the filler
182 list_pane.remove(filler_pane);
183 if(job_queue.size() > 0) {
184 Dimension progress_bar_size = delete_me.getProgressBar().getPreferredSize();
185 Dimension list_pane_size = list_pane.getSize();
186 int height = list_pane_size.height - (job_queue.size() * progress_bar_size.height);
187 progress_bar_size = null;
188 if(height > 0) {
189 filler_pane.setPreferredSize(new Dimension(list_pane_size.width, height));
190 list_pane.add(filler_pane);
191 }
192 list_pane_size = null;
193 }
194 list_pane.updateUI();
195 }
196 else {
197 DebugStream.println("Somehow we're trying to delete a job that is still running.");
198 }
199 }
200
201 /** Called by the WGet native code when the current download,
202 * for the indicated job, is completed. In turn all download
203 * listeners are informed.
204 * @see org.greenstone.gatherer.collection.DownloadJob
205 */
206 public synchronized void downloadComplete() {
207 job.downloadComplete();
208 }
209
210 /** Called by the WGet native code when the requested download returns
211 * a status code other than 200-399 for the specified download job.
212 * for.
213 * @see org.greenstone.gatherer.collection.DownloadJob
214 */
215 public synchronized void downloadFailed() {
216 // Add the failed download as a new job if the user so requests.
217 job.downloadFailed();
218 }
219
220 /** Called by the WGet native code when some non-fatal error has caused
221 * a download to fail. An example of a warning would be if a file can't
222 * be downloaded as doing so would clobber an existing file and the -nc
223 * flag is set.
224 * @see org.greenstone.gatherer.collection.DownloadJob
225 */
226 public synchronized void downloadWarning() {
227 job.downloadWarning();
228 }
229
230 /** Used by other graphic functions to get a reference to the
231 * scroll pane containing the current list of jobs.
232 */
233 public JScrollPane getDownloadJobList() {
234 return list_scroll;
235 }
236
237 /** Returns the current state of the stop flag for the job indicated.
238 * @return A boolean representing whether the user has requested to
239 * stop.
240 * @see org.greenstone.gatherer.collection.DownloadJob
241 */
242 public synchronized boolean hasSignalledStop() {
243 return job.hasSignalledStop();
244 }
245
246 /** Creates a new mirroring job on the queue given the target url and the destination (private, public). All other details are harvested from the config file, but these two must be captured from the GUI's current state.
247 * @param no_parents
248 * @param other_hosts
249 * @param page_requisites
250 * @param url a URL which points to the root url for the mirroring
251 * @param depth
252 * @param destination the destination file as a String
253 * @see org.greenstone.gatherer.Configuration
254 * @see org.greenstone.gatherer.Gatherer
255 * @see org.greenstone.gatherer.collection.DownloadJob
256 * @see org.greenstone.gatherer.gui.DownloadProgressBar
257 * @see org.greenstone.gatherer.util.GURL
258 */
259 public void newDownloadJob(boolean no_parents, boolean other_hosts, boolean page_requisites, URL url, int depth, String destination) {
260 // Create the job and fill in the details from gatherer.config.
261 DebugStream.println("About to create a new job");
262
263 DownloadJob new_job = new DownloadJob(Configuration.get("mirroring.debug", false), no_parents, other_hosts, page_requisites, Configuration.get("mirroring.quiet", false), url, depth, destination, Configuration.proxy_pass, Configuration.proxy_user, this, simple);
264
265 // Tell it to run as soon as possible
266 new_job.setState(DownloadJob.RUNNING);
267
268 // Add to job_queue job list.
269 job_queue.add(new_job);
270 // Now add it to the visual component, job list.
271 list_pane.remove(filler_pane);
272 Dimension progress_bar_size = new_job.getProgressBar().getPreferredSize();
273 Dimension list_pane_size = list_pane.getSize();
274 int height = list_pane_size.height - (job_queue.size() * progress_bar_size.height);
275 progress_bar_size = null;
276 list_pane.add(new_job.getProgressBar());
277 if(height > 0) {
278 filler_pane.setPreferredSize(new Dimension(list_pane_size.width, height));
279 list_pane.add(filler_pane);
280 }
281 list_pane_size = null;
282 //list_pane.setAlignmentX(Component.LEFT_ALIGNMENT);
283 list_pane.updateUI();
284 new_job = null;
285 synchronized(this) {
286 notify(); // Just incase its sleeping.
287 }
288 }
289
290 /** Called by the WGet native code to signal the current progress of
291 * downloading for the specified job.
292 * @param current A long representing the number of bytes that have
293 * been downloaded since the last update.
294 * @param expected A long representing the total number of bytes
295 * expected for this download.
296 * @see org.greenstone.gatherer.collection.DownloadJob
297 */
298 public synchronized void updateProgress(long current, long expected) {
299 job.updateProgress(current, expected);
300 }
301
302 /* There may be times when the download thread is sleeping, but the
303 * user has indicated that a previously paused job should now begin
304 * again. The flag within the job will change, so we tell the thread
305 * to start again.
306 */
307 public void resumeThread() {
308 synchronized(this) {
309 notify(); // Just incase its sleeping.
310 }
311 }
312
313 /* This begins the WGet thread, which simply iterates through the waiting
314 * jobs attempting each one. Successful downloads are removed from the
315 * waiting list.
316 * @see org.greenstone.gatherer.Gatherer
317 * @see org.greenstone.gatherer.collection.DownloadJob
318 */
319 public void run() {
320 while(true) {
321 // If there are jobs job_queue and we have more room.
322 if(job_queue.size() > 0) {
323 int index = 0;
324 while(job_queue.size() > 0 && index < job_queue.size()) {
325 // Get the first job that isn't stopped.
326 job = (DownloadJob) job_queue.get(index);
327 if(job.getState() == DownloadJob.RUNNING) {
328 DebugStream.println("DownloadJob " + job.toString() + " Begun.");
329 // A lock to prevent us deleting this job while its being
330 // run, unless you want things to go really wrong.
331 busy = true;
332 if(simple) {
333 job.callWGet();
334 }
335 else {
336 job.callWGetNative();
337 }
338 busy = false;
339 DebugStream.println("DownloadJob " + job.toString() + " complete.");
340 // And if the user has requested that complete jobs
341 // be removed, then remove it from the list.
342 //deleteDownloadJob(job);
343 job = null;
344 }
345 index++;
346 }
347 }
348 // In order to save processor time, I'll suspend the thread
349 // if theres no advantage to it running. Actions such as
350 // new or complete jobs will resume the thread.
351 else {
352 try {
353 synchronized(this) {
354 DebugStream.println("WGet thread is waiting for DownloadJobs.");
355 wait();
356 }
357 } catch (InterruptedException e) {
358 // Time to get going again.
359 }
360 }
361 } // End While.
362 }
363
364 /* Link to the call to the WGet Native method for downloading.
365 * @param argc An int representing the number of elements in argv.
366 * @param argv An array of objects passed as arguments to wget.
367 * @param debug Whether the native code should show java-only debug
368 * messages.
369 */
370 public native int wget(int argc, Object argv[], boolean debug);
371
372 /* Link to the call to the WGet Native method for converting url links.
373 * @param argc An int representing the number of object elements in argv.
374 * @param argv An array of objects passed as arguments to wget.
375 * @param urlc An int representing the number of url string elements in urlv.
376 * @param urlv An array of strings representing urls.
377 * @param filec An int representing the number of file string elements in filev.
378 * @param filev An array of strings representing files.
379 */
380 public native void wren(int argc, Object argv[], int urlc, Object urlv[], int filec, Object filev[]);
381}
Note: See TracBrowser for help on using the repository browser.