source: trunk/gli/src/org/greenstone/gatherer/WGet.java@ 9078

Last change on this file since 9078 was 8240, checked in by mdewsnip, 20 years ago

Removed unnecessary imports of org.greenstone.gatherer.Gatherer.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.6 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer;
38
39import java.awt.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.*;
44import javax.swing.tree.*;
45import org.greenstone.gatherer.collection.DownloadJob;
46
47/** This class provides access to the functionality of the WGet program, either by calling it via a shell script or by the JNI. It maintains a queue of pending jobs, and the component for showing these tasks to the user.
48 * @author John Thompson, Greenstone Digital Library, University of Waikato
49 * @version 2.3
50 */
51public class WGet
52 extends Thread {
53
54 /** <i>true</i> if there is a task currently being carried out, <i>false</i> otherwise. */
55 private boolean busy = false;
56 /** <i>true</i> if verbose debug messages should be displayed, <i>false</i> otherwise. */
57 private boolean debug = false;
58 /** <i>true</i> if successfully completed tasks should be automatically removed from the job queue. */
59 private boolean remove_complete_jobs = true;
60
61 private JPanel filler_pane = null;
62 /** The panel that the task list will be shown in. */
63 private JPanel list_pane;
64 /** The job currently underway. */
65 private DownloadJob job;
66 /** A scroll pane which will be used to display the list of pending tasks. */
67 private JScrollPane list_scroll;
68 /** A queue of download tasks. */
69 private Vector job_queue;
70 /** A static flag used to switch between simple and advanced modes. If <i>true</i> the Process object is used to externally call the Wget program. If <i>false</i> the native WGet libraries are statically loaded and the JNI used to download directly. */
71 static final private boolean simple = true;
72 /** Load the WGet native library. */
73 static {
74 if(!simple) {
75 System.load(System.getProperty("user.dir") + File.separator + "libgatherer.so");
76 }
77 }
78
79 /** Constructor. Nothing special. */
80 public WGet() {
81 job = null;
82 job_queue = new Vector();
83 filler_pane = new JPanel();
84 list_pane = new JPanel();
85 list_pane.setLayout(new BoxLayout(list_pane, BoxLayout.Y_AXIS));
86 //list_pane.setLayout(new GridLayout(height_count,1));
87 list_scroll = new JScrollPane(list_pane);
88 //list_scroll.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_NEVER);
89 }
90
91 /** Called by the WGet native code to inform us of a new download starting within the given job.
92 * @param url The url that is being downloaded, as a <strong>String</strong>.
93 * @see org.greenstone.gatherer.collection.DownloadJob
94 */
95 public synchronized void addDownload(String url) {
96 job.addDownload(url);
97 }
98
99 /* Used to advise the DownloadJob of a newly parsed link. Its up to DownloadJob to decide if it already knows about this url, and if not to update its progress bar.
100 * @param url The url in question as a <strong>String</strong>.
101 * @param type <i>true</i> if this is an internal link, <i>false</i> for and external one.
102 * @return A <i>boolean</i> with a value of <i>true</i> indicating if the url was added, <i>false</i> otherwise.
103 * @see org.greenstone.gatherer.collection.DownloadJob
104 */
105 public synchronized boolean addLink(String url, int type) {
106 return job.addLink(url, type);
107 }
108
109 /* Whenever files are moved into or out of the collection we need to
110 * run convertLinks on the files remaining. This ensures that we have
111 * the most efficient balance between local and absolute links.
112 * @param records A <strong>Vector</strong> containing information about the files in
113 * the current collection. From these url and file information is
114 * harvested.
115 */
116 /* private void convertLinks() {
117 Vector args = new Vector();
118 Vector files = new Vector();
119 Vector urls = new Vector();
120 // Args - there ain't many
121 args.add("-d");
122 args.add("-o");
123 args.add("debug.txt");
124 args.add("-P");
125 args.add("/tmp/"); */
126
127 // Downloaded urls (two entries for each record). The pattern here is:
128 // local file
129 // original url of file
130 /*
131 for(Enumeration e = records.elements(); e.hasMoreElements();) {
132 urls.add(((GURL)e).getLocalName());
133 urls.add(((GURL)e).getURL());
134 }
135 */
136 //urls.add("/tmp//www.cs.waikato.ac.nz/index.html");
137 //urls.add("http://www.cs.waikato.ac.nz/index.html");
138 //urls.add("/tmp//www.cs.waikato.ac.nz/events.html");
139 //urls.add("http://www.cs.waikato.ac.nz/events.html");
140 //urls.add("/tmp//www.cs.waikato.ac.nz/history.html");
141 //urls.add("http://www.cs.waikato.ac.nz/history.html");
142 //urls.add("/tmp//www.cs.waikato.ac.nz/icons/cs_title_logo.gif");
143 //urls.add("http://www.cs.waikato.ac.nz/icons/cs_title_logo.gif");
144 //urls.add("/tmp//www.cs.waikato.ac.nz/icons/scms_title_logo.gif");
145 //urls.add("http://www.cs.waikato.ac.nz/icons/scms_title_logo.gif");
146
147 // Downloaded files (html only). We race back through our records
148 // looking for html/text content ones.
149 /*
150 for(Enumeration e = records.elements(); e.hasMoreElements(); ) {
151 GURL record = (GURL)e;
152 if(record.isHTML()) {
153 files.add(record.getLocalName());
154 }
155 }
156 */
157 //files.add("/tmp//www.cs.waikato.ac.nz/index.html");
158
159 //wren(args.size(), args.toArray(), urls.size(), urls.toArray(),
160 // files.size(), files.toArray());
161 /* } */
162
163 /** This method is called to delete a certain job from the queue.
164 * This job may be pending, complete or even in progress. However
165 * if it is currently downloading then the delete method must
166 * wait until the native job has cleanly exited before removing
167 * the job.
168 * @param delete_me The <strong>DownloadJob</strong> that is to be deleted.
169 */
170 public void deleteDownloadJob(DownloadJob delete_me) {
171 if (delete_me == job) {
172 // While this seems wasteful its only for the briefest moment.
173 while(busy) {
174 }
175 job = null;
176 }
177 if (delete_me.hasSignalledStop()) {
178 list_pane.remove(delete_me.getProgressBar());
179 job_queue.remove(delete_me);
180 // Unfortunately removing a task gets a bit more complicated as we have to resize the filler
181 list_pane.remove(filler_pane);
182 if(job_queue.size() > 0) {
183 Dimension progress_bar_size = delete_me.getProgressBar().getPreferredSize();
184 Dimension list_pane_size = list_pane.getSize();
185 int height = list_pane_size.height - (job_queue.size() * progress_bar_size.height);
186 progress_bar_size = null;
187 if(height > 0) {
188 filler_pane.setPreferredSize(new Dimension(list_pane_size.width, height));
189 list_pane.add(filler_pane);
190 }
191 list_pane_size = null;
192 }
193 list_pane.updateUI();
194 }
195 else {
196 DebugStream.println("Somehow we're trying to delete a job that is still running.");
197 }
198 }
199
200 /** Called by the WGet native code when the current download,
201 * for the indicated job, is completed. In turn all download
202 * listeners are informed.
203 * @see org.greenstone.gatherer.collection.DownloadJob
204 */
205 public synchronized void downloadComplete() {
206 job.downloadComplete();
207 }
208
209 /** Called by the WGet native code when the requested download returns
210 * a status code other than 200-399 for the specified download job.
211 * for.
212 * @see org.greenstone.gatherer.collection.DownloadJob
213 */
214 public synchronized void downloadFailed() {
215 // Add the failed download as a new job if the user so requests.
216 job.downloadFailed();
217 }
218
219 /** Called by the WGet native code when some non-fatal error has caused
220 * a download to fail. An example of a warning would be if a file can't
221 * be downloaded as doing so would clobber an existing file and the -nc
222 * flag is set.
223 * @see org.greenstone.gatherer.collection.DownloadJob
224 */
225 public synchronized void downloadWarning() {
226 job.downloadWarning();
227 }
228
229 /** Used by other graphic functions to get a reference to the
230 * scroll pane containing the current list of jobs.
231 */
232 public JScrollPane getDownloadJobList() {
233 return list_scroll;
234 }
235
236 /** Returns the current state of the stop flag for the job indicated.
237 * @return A boolean representing whether the user has requested to
238 * stop.
239 * @see org.greenstone.gatherer.collection.DownloadJob
240 */
241 public synchronized boolean hasSignalledStop() {
242 return job.hasSignalledStop();
243 }
244
245 /** Creates a new mirroring job on the queue given the target url and the destination (private, public). All other details are harvested from the config file, but these two must be captured from the GUI's current state.
246 * @param no_parents
247 * @param other_hosts
248 * @param page_requisites
249 * @param url a URL which points to the root url for the mirroring
250 * @param depth
251 * @param destination the destination file as a String
252 * @see org.greenstone.gatherer.Configuration
253 * @see org.greenstone.gatherer.collection.DownloadJob
254 * @see org.greenstone.gatherer.gui.DownloadProgressBar
255 * @see org.greenstone.gatherer.util.GURL
256 */
257 public void newDownloadJob(boolean no_parents, boolean other_hosts, boolean page_requisites, URL url, int depth, String destination) {
258 // Create the job and fill in the details from gatherer.config.
259 DebugStream.println("About to create a new job");
260
261 DownloadJob new_job = new DownloadJob(Configuration.get("mirroring.debug", false), no_parents, other_hosts, page_requisites, Configuration.get("mirroring.quiet", false), url, depth, destination, Configuration.proxy_pass, Configuration.proxy_user, this, simple);
262
263 // Tell it to run as soon as possible
264 new_job.setState(DownloadJob.RUNNING);
265
266 // Add to job_queue job list.
267 job_queue.add(new_job);
268 // Now add it to the visual component, job list.
269 list_pane.remove(filler_pane);
270 Dimension progress_bar_size = new_job.getProgressBar().getPreferredSize();
271 Dimension list_pane_size = list_pane.getSize();
272 int height = list_pane_size.height - (job_queue.size() * progress_bar_size.height);
273 progress_bar_size = null;
274 list_pane.add(new_job.getProgressBar());
275 if(height > 0) {
276 filler_pane.setPreferredSize(new Dimension(list_pane_size.width, height));
277 list_pane.add(filler_pane);
278 }
279 list_pane_size = null;
280 //list_pane.setAlignmentX(Component.LEFT_ALIGNMENT);
281 list_pane.updateUI();
282 new_job = null;
283 synchronized(this) {
284 notify(); // Just incase its sleeping.
285 }
286 }
287
288 /** Called by the WGet native code to signal the current progress of
289 * downloading for the specified job.
290 * @param current A long representing the number of bytes that have
291 * been downloaded since the last update.
292 * @param expected A long representing the total number of bytes
293 * expected for this download.
294 * @see org.greenstone.gatherer.collection.DownloadJob
295 */
296 public synchronized void updateProgress(long current, long expected) {
297 job.updateProgress(current, expected);
298 }
299
300 /* There may be times when the download thread is sleeping, but the
301 * user has indicated that a previously paused job should now begin
302 * again. The flag within the job will change, so we tell the thread
303 * to start again.
304 */
305 public void resumeThread() {
306 synchronized(this) {
307 notify(); // Just incase its sleeping.
308 }
309 }
310
311 /* This begins the WGet thread, which simply iterates through the waiting
312 * jobs attempting each one. Successful downloads are removed from the
313 * waiting list.
314 * @see org.greenstone.gatherer.collection.DownloadJob
315 */
316 public void run() {
317 while(true) {
318 // If there are jobs job_queue and we have more room.
319 if(job_queue.size() > 0) {
320 int index = 0;
321 while(job_queue.size() > 0 && index < job_queue.size()) {
322 // Get the first job that isn't stopped.
323 job = (DownloadJob) job_queue.get(index);
324 if(job.getState() == DownloadJob.RUNNING) {
325 DebugStream.println("DownloadJob " + job.toString() + " Begun.");
326 // A lock to prevent us deleting this job while its being
327 // run, unless you want things to go really wrong.
328 busy = true;
329 if(simple) {
330 job.callWGet();
331 }
332 else {
333 job.callWGetNative();
334 }
335 busy = false;
336 DebugStream.println("DownloadJob " + job.toString() + " complete.");
337 // And if the user has requested that complete jobs
338 // be removed, then remove it from the list.
339 //deleteDownloadJob(job);
340 job = null;
341 }
342 index++;
343 }
344 }
345 // In order to save processor time, I'll suspend the thread
346 // if theres no advantage to it running. Actions such as
347 // new or complete jobs will resume the thread.
348 else {
349 try {
350 synchronized(this) {
351 DebugStream.println("WGet thread is waiting for DownloadJobs.");
352 wait();
353 }
354 } catch (InterruptedException e) {
355 // Time to get going again.
356 }
357 }
358 } // End While.
359 }
360
361 /* Link to the call to the WGet Native method for downloading.
362 * @param argc An int representing the number of elements in argv.
363 * @param argv An array of objects passed as arguments to wget.
364 * @param debug Whether the native code should show java-only debug
365 * messages.
366 */
367 public native int wget(int argc, Object argv[], boolean debug);
368
369 /* Link to the call to the WGet Native method for converting url links.
370 * @param argc An int representing the number of object elements in argv.
371 * @param argv An array of objects passed as arguments to wget.
372 * @param urlc An int representing the number of url string elements in urlv.
373 * @param urlv An array of strings representing urls.
374 * @param filec An int representing the number of file string elements in filev.
375 * @param filev An array of strings representing files.
376 */
377 public native void wren(int argc, Object argv[], int urlc, Object urlv[], int filec, Object filev[]);
378}
Note: See TracBrowser for help on using the repository browser.