source: trunk/gli/src/org/greenstone/gatherer/WGet.java@ 5785

Last change on this file since 5785 was 5785, checked in by mdewsnip, 21 years ago

Commented out about 60 unused functions.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.7 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer;
38
39import java.awt.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.*;
44import javax.swing.tree.*;
45import org.greenstone.gatherer.Gatherer;
46import org.greenstone.gatherer.collection.Job;
47
48/** This class provides access to the functionality of the WGet program, either by calling it via a shell script or by the JNI. It maintains a queue of pending jobs, and the component for showing these tasks to the user.
49 * @author John Thompson, Greenstone Digital Library, University of Waikato
50 * @version 2.3
51 */
52public class WGet
53 extends Thread {
54
55 /** <i>true</i> if there is a task currently being carried out, <i>false</i> otherwise. */
56 private boolean busy = false;
57 /** <i>true</i> if verbose debug messages should be displayed, <i>false</i> otherwise. */
58 private boolean debug = false;
59 /** <i>true</i> if successfully completed tasks should be automatically removed from the job queue. */
60 private boolean remove_complete_jobs = true;
61
62 private JPanel filler_pane = null;
63 /** The panel that the task list will be shown in. */
64 private JPanel list_pane;
65 /** The job currently underway. */
66 private Job job;
67 /** A scroll pane which will be used to display the list of pending tasks. */
68 private JScrollPane list_scroll;
69 /** A queue of download tasks. */
70 private Vector job_queue;
71 /** A static flag used to switch between simple and advanced modes. If <i>true</i> the Process object is used to externally call the Wget program. If <i>false</i> the native WGet libraries are statically loaded and the JNI used to download directly. */
72 static final private boolean simple = true;
73 /** Load the WGet native library. */
74 static {
75 if(!simple) {
76 System.load(System.getProperty("user.dir") + File.separator + "libgatherer.so");
77 }
78 }
79
80 /** Constructor. Nothing special. */
81 public WGet() {
82 job = null;
83 job_queue = new Vector();
84 filler_pane = new JPanel();
85 list_pane = new JPanel();
86 list_pane.setLayout(new BoxLayout(list_pane, BoxLayout.Y_AXIS));
87 //list_pane.setLayout(new GridLayout(height_count,1));
88 list_scroll = new JScrollPane(list_pane);
89 //list_scroll.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_NEVER);
90 }
91
92 /** Called by the WGet native code to inform us of a new download starting within the given job.
93 * @param url The url that is being downloaded, as a <strong>String</strong>.
94 * @see org.greenstone.gatherer.collection.Job
95 */
96 public synchronized void addDownload(String url) {
97 job.addDownload(url);
98 }
99
100 /* Used to advise the Job of a newly parsed link. Its up to Job to decide if it already knows about this url, and if not to update its progress bar.
101 * @param url The url in question as a <strong>String</strong>.
102 * @param type <i>true</i> if this is an internal link, <i>false</i> for and external one.
103 * @return A <i>boolean</i> with a value of <i>true</i> indicating if the url was added, <i>false</i> otherwise.
104 * @see org.greenstone.gatherer.collection.Job
105 */
106 public synchronized boolean addLink(String url, int type) {
107 return job.addLink(url, type);
108 }
109
110 /* Whenever files are moved into or out of the collection we need to
111 * run convertLinks on the files remaining. This ensures that we have
112 * the most efficient balance between local and absolute links.
113 * @param records A <strong>Vector</strong> containing information about the files in
114 * the current collection. From these url and file information is
115 * harvested.
116 */
117 /* private void convertLinks() {
118 Vector args = new Vector();
119 Vector files = new Vector();
120 Vector urls = new Vector();
121 // Args - there ain't many
122 args.add("-d");
123 args.add("-o");
124 args.add("debug.txt");
125 args.add("-P");
126 args.add("/tmp/"); */
127
128 // Downloaded urls (two entries for each record). The pattern here is:
129 // local file
130 // original url of file
131 /*
132 for(Enumeration e = records.elements(); e.hasMoreElements();) {
133 urls.add(((GURL)e).getLocalName());
134 urls.add(((GURL)e).getURL());
135 }
136 */
137 //urls.add("/tmp//www.cs.waikato.ac.nz/index.html");
138 //urls.add("http://www.cs.waikato.ac.nz/index.html");
139 //urls.add("/tmp//www.cs.waikato.ac.nz/events.html");
140 //urls.add("http://www.cs.waikato.ac.nz/events.html");
141 //urls.add("/tmp//www.cs.waikato.ac.nz/history.html");
142 //urls.add("http://www.cs.waikato.ac.nz/history.html");
143 //urls.add("/tmp//www.cs.waikato.ac.nz/icons/cs_title_logo.gif");
144 //urls.add("http://www.cs.waikato.ac.nz/icons/cs_title_logo.gif");
145 //urls.add("/tmp//www.cs.waikato.ac.nz/icons/scms_title_logo.gif");
146 //urls.add("http://www.cs.waikato.ac.nz/icons/scms_title_logo.gif");
147
148 // Downloaded files (html only). We race back through our records
149 // looking for html/text content ones.
150 /*
151 for(Enumeration e = records.elements(); e.hasMoreElements(); ) {
152 GURL record = (GURL)e;
153 if(record.isHTML()) {
154 files.add(record.getLocalName());
155 }
156 }
157 */
158 //files.add("/tmp//www.cs.waikato.ac.nz/index.html");
159
160 //wren(args.size(), args.toArray(), urls.size(), urls.toArray(),
161 // files.size(), files.toArray());
162 /* } */
163
164 /** This method is called to delete a certain job from the queue.
165 * This job may be pending, complete or even in progress. However
166 * if it is currently downloading then the delete method must
167 * wait until the native job has cleanly exited before removing
168 * the job.
169 * @param delete_me The <strong>Job</strong> that is to be deleted.
170 */
171 public void deleteJob(Job delete_me) {
172 if (delete_me == job) {
173 // While this seems wasteful its only for the briefest moment.
174 while(busy) {
175 }
176 job = null;
177 }
178 if (delete_me.hasSignalledStop()) {
179 list_pane.remove(delete_me.getProgressBar());
180 job_queue.remove(delete_me);
181 // Unfortunately removing a task gets a bit more complicated as we have to resize the filler
182 list_pane.remove(filler_pane);
183 if(job_queue.size() > 0) {
184 Dimension progress_bar_size = delete_me.getProgressBar().getPreferredSize();
185 Dimension list_pane_size = list_pane.getSize();
186 int height = list_pane_size.height - (job_queue.size() * progress_bar_size.height);
187 progress_bar_size = null;
188 if(height > 0) {
189 filler_pane.setPreferredSize(new Dimension(list_pane_size.width, height));
190 list_pane.add(filler_pane);
191 }
192 list_pane_size = null;
193 }
194 list_pane.updateUI();
195 }
196 else {
197 Gatherer.println("Somehow we're trying to delete a job that is still running.");
198 }
199 }
200
201 /** Called by the WGet native code when the current download,
202 * for the indicated job, is completed. In turn all download
203 * listeners are informed.
204 * @see org.greenstone.gatherer.collection.Job
205 */
206 public synchronized void downloadComplete() {
207 job.downloadComplete();
208 }
209
210 /** Called by the WGet native code when the requested download returns
211 * a status code other than 200-399 for the specified download job.
212 * for.
213 * @see org.greenstone.gatherer.collection.Job
214 */
215 public synchronized void downloadFailed() {
216 // Add the failed download as a new job if the user so requests.
217 job.downloadFailed();
218 }
219
220 /** Called by the WGet native code when some non-fatal error has caused
221 * a download to fail. An example of a warning would be if a file can't
222 * be downloaded as doing so would clobber an existing file and the -nc
223 * flag is set.
224 * @see org.greenstone.gatherer.collection.Job
225 */
226 public synchronized void downloadWarning() {
227 job.downloadWarning();
228 }
229
230 /** Used by other graphic functions to get a reference to the
231 * scroll pane containing the current list of jobs.
232 */
233 public JScrollPane getJobList() {
234 return list_scroll;
235 }
236
237 /** Returns the current state of the stop flag for the job indicated.
238 * @return A boolean representing whether the user has requested to
239 * stop.
240 * @see org.greenstone.gatherer.collection.Job
241 */
242 public synchronized boolean hasSignalledStop() {
243 return job.hasSignalledStop();
244 }
245
246 /** Creates a new mirroring job on the queue given the target url and the destination (private, public). All other details are harvested from the config file, but these two must be captured from the GUI's current state.
247 * @param url a URL which points to the root url for the mirroring
248 * @param model the GTreeModel that any new records should be added to
249 * @param destination the destination file as a String
250 * @see org.greenstone.gatherer.Configuration
251 * @see org.greenstone.gatherer.Gatherer
252 * @see org.greenstone.gatherer.collection.Job
253 * @see org.greenstone.gatherer.gui.GProgressBar
254 * @see org.greenstone.gatherer.util.GURL
255 */
256 public void newJob(TreeModel model, boolean overwrite, boolean no_parents, boolean other_hosts, boolean page_requisites, URL url, int depth, String destination) {
257 // Create the job and fill in the details from gatherer.config.
258 Gatherer.println("About to create a new job");
259 // If it was decided not to download page requisites, then create the destination by basing it on the given destination, but appending the url host. If page requisites is used then WGet will do this for us
260 if(!page_requisites) {
261 destination = destination + url.getHost();
262 }
263 Job new_job = new Job(model, overwrite, Gatherer.config.get("mirroring.debug", false), no_parents, other_hosts, page_requisites, Gatherer.config.get("mirroring.quiet", false), url, depth, destination, Gatherer.config.proxy_pass, Gatherer.config.proxy_user, this, simple);
264 // Add to job_queue job list.
265 job_queue.add(new_job);
266 // Now add it to the visual component, job list.
267 list_pane.remove(filler_pane);
268 Dimension progress_bar_size = new_job.getProgressBar().getPreferredSize();
269 Dimension list_pane_size = list_pane.getSize();
270 int height = list_pane_size.height - (job_queue.size() * progress_bar_size.height);
271 progress_bar_size = null;
272 list_pane.add(new_job.getProgressBar());
273 if(height > 0) {
274 filler_pane.setPreferredSize(new Dimension(list_pane_size.width, height));
275 list_pane.add(filler_pane);
276 }
277 list_pane_size = null;
278 //list_pane.setAlignmentX(Component.LEFT_ALIGNMENT);
279 list_pane.updateUI();
280 new_job = null;
281 synchronized(this) {
282 notify(); // Just incase its sleeping.
283 }
284 }
285
286 /** Called by the WGet native code to signal the current progress of
287 * downloading for the specified job.
288 * @param current A long representing the number of bytes that have
289 * been downloaded since the last update.
290 * @param expected A long representing the total number of bytes
291 * expected for this download.
292 * @see org.greenstone.gatherer.collection.Job
293 */
294 public synchronized void updateProgress(long current, long expected) {
295 job.updateProgress(current, expected);
296 }
297
298 /* There may be times when the download thread is sleeping, but the
299 * user has indicated that a previously paused job should now begin
300 * again. The flag within the job will change, so we tell the thread
301 * to start again.
302 */
303 public void resumeThread() {
304 synchronized(this) {
305 notify(); // Just incase its sleeping.
306 }
307 }
308
309 /* This begins the WGet thread, which simply iterates through the waiting
310 * jobs attempting each one. Successful downloads are removed from the
311 * waiting list.
312 * @see org.greenstone.gatherer.Gatherer
313 * @see org.greenstone.gatherer.collection.Job
314 */
315 public void run() {
316 while(true) {
317 // If there are jobs job_queue and we have more room.
318 if(job_queue.size() > 0) {
319 int index = 0;
320 while(index < job_queue.size()) {
321 // Get the first job that isn't stopped.
322 job = (Job) job_queue.get(index);
323 if(job.getState() == Job.RUNNING) {
324 Gatherer.println("Job " + job.toString() + " Begun.");
325 // A lock to prevent us deleting this job while its being
326 // run, unless you want things to go really wrong.
327 busy = true;
328 if(simple) {
329 job.callWGet();
330 }
331 else {
332 job.callWGetNative();
333 }
334 busy = false;
335 Gatherer.println("Job " + job.toString() + " complete.");
336 // And if the user has requested that complete jobs
337 // be removed, then remove it from the list.
338 deleteJob(job);
339 job = null;
340 }
341 index++;
342 }
343 }
344 // In order to save processor time, I'll suspend the thread
345 // if theres no advantage to it running. Actions such as
346 // new or complete jobs will resume the thread.
347 else {
348 try {
349 synchronized(this) {
350 Gatherer.println("WGet thread is waiting for Jobs.");
351 wait();
352 }
353 } catch (InterruptedException e) {
354 // Time to get going again.
355 }
356 }
357 } // End While.
358 }
359
360 /* Link to the call to the WGet Native method for downloading.
361 * @param argc An int representing the number of elements in argv.
362 * @param argv An array of objects passed as arguments to wget.
363 * @param debug Whether the native code should show java-only debug
364 * messages.
365 */
366 public native int wget(int argc, Object argv[], boolean debug);
367
368 /* Link to the call to the WGet Native method for converting url links.
369 * @param argc An int representing the number of object elements in argv.
370 * @param argv An array of objects passed as arguments to wget.
371 * @param urlc An int representing the number of url string elements in urlv.
372 * @param urlv An array of strings representing urls.
373 * @param filec An int representing the number of file string elements in filev.
374 * @param filev An array of strings representing files.
375 */
376 public native void wren(int argc, Object argv[], int urlc, Object urlv[], int filec, Object filev[]);
377}
Note: See TracBrowser for help on using the repository browser.