source: trunk/gli/src/org/greenstone/gatherer/WGet.java@ 4368

Last change on this file since 4368 was 4363, checked in by kjdon, 21 years ago

re-tabbed the code for java

  • Property svn:keywords set to Author Date Id Revision
File size: 13.4 KB
Line 
1package org.greenstone.gatherer;
2/**
3 *#########################################################################
4 *
5 * A component of the Gatherer application, part of the Greenstone digital
6 * library suite from the New Zealand Digital Library Project at the
7 * University of Waikato, New Zealand.
8 *
9 * <BR><BR>
10 *
11 * Author: John Thompson, Greenstone Digital Library, University of Waikato
12 *
13 * <BR><BR>
14 *
15 * Copyright (C) 1999 New Zealand Digital Library Project
16 *
17 * <BR><BR>
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * <BR><BR>
25 *
26 * This program is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * <BR><BR>
32 *
33 * You should have received a copy of the GNU General Public License
34 * along with this program; if not, write to the Free Software
35 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
36 *########################################################################
37 */
38import java.awt.*;
39import java.io.*;
40import java.util.*;
41import javax.swing.*;
42import javax.swing.tree.*;
43import org.greenstone.gatherer.Gatherer;
44import org.greenstone.gatherer.collection.Job;
45import org.greenstone.gatherer.util.GURL;
46/** This class provides access to the functionality of the WGet program, either by calling it via a shell script or by the JNI. It maintains a queue of pending jobs, and the component for showing these tasks to the user.
47 * @author John Thompson, Greenstone Digital Library, University of Waikato
48 * @version 2.3
49 */
50public class WGet
51 extends Thread {
52 /** <i>true</i> if there is a task currently being carried out, <i>false</i> otherwise. */
53 private boolean busy = false;
54 /** <i>true</i> if verbose debug messages should be displayed, <i>false</i> otherwise. */
55 private boolean debug = false;
56 /** <i>true</i> if successfully completed tasks should be automatically removed from the job queue. */
57 private boolean remove_complete_jobs = true;
58 /** The panel that the task list will be shown it. */
59 private JPanel list_pane;
60 /** The job currently underway. */
61 private Job job;
62 /** A scroll pane which will be used to display the list of pending tasks. */
63 private JScrollPane list_scroll;
64 /** A queue of download tasks. */
65 private Vector job_queue;
66 /** A static flag used to switch between simple and advanced modes. If <i>true</i> the Process object is used to externally call the Wget program. If <i>false</i> the native WGet libraries are statically loaded and the JNI used to download directly. */
67 static final private boolean simple = true;
68 /** Load the WGet native library. */
69 static {
70 if(!simple) {
71 System.load(System.getProperty("user.dir") + File.separator + "libgatherer.so");
72 }
73 }
74
75 /** Constructor. Nothing special. */
76 public WGet() {
77 job = null;
78 job_queue = new Vector();
79 list_pane = new JPanel();
80 list_pane.setLayout(new BoxLayout(list_pane, BoxLayout.Y_AXIS));
81 //list_pane.setLayout(new GridLayout(1,1));
82 list_scroll = new JScrollPane(list_pane);
83 //list_scroll.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_NEVER);
84 }
85
86 /** Called by the WGet native code to inform us of a new download starting within the given job.
87 * @param url The url that is being downloaded, as a <strong>String</strong>.
88 * @see org.greenstone.gatherer.collection.Job
89 */
90 public synchronized void addDownload(String url) {
91 job.addDownload(url);
92 }
93
94 /* Used to advise the Job of a newly parsed link. Its up to Job to decide if it already knows about this url, and if not to update its progress bar.
95 * @param url The url in question as a <strong>String</strong>.
96 * @param type <i>true</i> if this is an internal link, <i>false</i> for and external one.
97 * @return A <i>boolean</i> with a value of <i>true</i> indicating if the url was added, <i>false</i> otherwise.
98 * @see org.greenstone.gatherer.collection.Job
99 */
100 public synchronized boolean addLink(String url, int type) {
101 return job.addLink(url, type);
102 }
103
104 /* Whenever files are moved into or out of the collection we need to
105 * run convertLinks on the files remaining. This ensures that we have
106 * the most efficient balance between local and absolute links.
107 * @param records A <strong>Vector</strong> containing information about the files in
108 * the current collection. From these url and file information is
109 * harvested.
110 */
111 // public void convertLinks(Vector records) {
112 public void convertLinks() {
113 Vector args = new Vector();
114 Vector files = new Vector();
115 Vector urls = new Vector();
116 // Args - there ain't many
117 args.add("-d");
118 args.add("-o");
119 args.add("debug.txt");
120 args.add("-P");
121 args.add("/tmp/");
122
123 // Downloaded urls (two entries for each record). The pattern here is:
124 // local file
125 // original url of file
126 /*
127 for(Enumeration e = records.elements(); e.hasMoreElements();) {
128 urls.add(((GURL)e).getLocalName());
129 urls.add(((GURL)e).getURL());
130 }
131 */
132 //urls.add("/tmp//www.cs.waikato.ac.nz/index.html");
133 //urls.add("http://www.cs.waikato.ac.nz/index.html");
134 //urls.add("/tmp//www.cs.waikato.ac.nz/events.html");
135 //urls.add("http://www.cs.waikato.ac.nz/events.html");
136 //urls.add("/tmp//www.cs.waikato.ac.nz/history.html");
137 //urls.add("http://www.cs.waikato.ac.nz/history.html");
138 //urls.add("/tmp//www.cs.waikato.ac.nz/icons/cs_title_logo.gif");
139 //urls.add("http://www.cs.waikato.ac.nz/icons/cs_title_logo.gif");
140 //urls.add("/tmp//www.cs.waikato.ac.nz/icons/scms_title_logo.gif");
141 //urls.add("http://www.cs.waikato.ac.nz/icons/scms_title_logo.gif");
142
143 // Downloaded files (html only). We race back through our records
144 // looking for html/text content ones.
145 /*
146 for(Enumeration e = records.elements(); e.hasMoreElements(); ) {
147 GURL record = (GURL)e;
148 if(record.isHTML()) {
149 files.add(record.getLocalName());
150 }
151 }
152 */
153 //files.add("/tmp//www.cs.waikato.ac.nz/index.html");
154
155 //wren(args.size(), args.toArray(), urls.size(), urls.toArray(),
156 // files.size(), files.toArray());
157 }
158
159 /** This method is called to delete a certain job from the queue.
160 * This job may be pending, complete or even in progress. However
161 * if it is currently downloading then the delete method must
162 * wait until the native job has cleanly exited before removing
163 * the job.
164 * @param delete_me The <strong>Job</strong> that is to be deleted.
165 */
166 public void deleteJob(Job delete_me) {
167 if(delete_me == job) {
168 // While this seems wasteful its only for the briefest moment.
169 while(busy) {
170 }
171 job = null;
172 }
173 if(delete_me.hasSignalledStop()) {
174 list_pane.remove(delete_me.getProgressBar());
175 list_pane.updateUI();
176 job_queue.remove(delete_me);
177 }
178 else {
179 Gatherer.println("Somehow we're trying to delete a job that is still running.");
180 }
181 }
182
183 /** Called by the WGet native code when the current download,
184 * for the indicated job, is completed. In turn all download
185 * listeners are informed.
186 * @see org.greenstone.gatherer.collection.Job
187 */
188 public synchronized void downloadComplete() {
189 job.downloadComplete();
190 }
191
192 /** Called by the WGet native code when the requested download returns
193 * a status code other than 200-399 for the specified download job.
194 * for.
195 * @see org.greenstone.gatherer.collection.Job
196 */
197 public synchronized void downloadFailed() {
198 // Add the failed download as a new job if the user so requests.
199 job.downloadFailed();
200 }
201
202 /** Called by the WGet native code when some non-fatal error has caused
203 * a download to fail. An example of a warning would be if a file can't
204 * be downloaded as doing so would clobber an existing file and the -nc
205 * flag is set.
206 * @see org.greenstone.gatherer.collection.Job
207 */
208 public synchronized void downloadWarning() {
209 job.downloadWarning();
210 }
211
212 /** Used by other graphic functions to get a reference to the
213 * scroll pane containing the current list of jobs.
214 */
215 public JScrollPane getJobList() {
216 return list_scroll;
217 }
218
219 /** Returns the current state of the stop flag for the job indicated.
220 * @return A boolean representing whether the user has requested to
221 * stop.
222 * @see org.greenstone.gatherer.collection.Job
223 */
224 public synchronized boolean hasSignalledStop() {
225 return job.hasSignalledStop();
226 }
227
228 /** Creates a new mirroring job on the queue given the target url and the destination (private, public). All other details are harvested from the config file, but these two must be captured from the GUI's current state.
229 * @param url A <strong>GURL</strong> which points to the root url for the mirroring.
230 * @param model The <strong>GTreeModel</strong> that any new records should be added to.
231 * @param destination The destination file as a <strong>String</strong>.
232 * @see org.greenstone.gatherer.Configuration
233 * @see org.greenstone.gatherer.Gatherer
234 * @see org.greenstone.gatherer.collection.Job
235 * @see org.greenstone.gatherer.gui.GProgressBar
236 * @see org.greenstone.gatherer.util.GURL
237 */
238 public void newJob(GURL url, TreeModel model, String destination) {
239 if(url.valid()) {
240 // Create the job and fill in the details from gatherer.config.
241 Gatherer.println("About to create a new job");
242 Job new_job = new Job(model, Gatherer.config.get("mirroring.overwrite", false), Gatherer.config.get("mirroring.debug", false), Gatherer.config.get("mirroring.no_parents", false), Gatherer.config.get("mirroring.other_hosts", false), Gatherer.config.get("mirroring.page_requisites", false), Gatherer.config.get("mirroring.quiet", false), url, Gatherer.config.getInt("mirroring.depth", false), destination, Gatherer.config.proxy_pass, Gatherer.config.proxy_user, this, simple);
243 // Add to job_queue job list.
244 job_queue.add(new_job);
245 // Now add it to the visual component, job list.
246 list_pane.add(new_job.getProgressBar());
247 //list_pane.setAlignmentX(Component.LEFT_ALIGNMENT);
248 list_pane.updateUI();
249 new_job = null;
250 synchronized(this) {
251 notify(); // Just incase its sleeping.
252 }
253 }
254 }
255
256 /** Called by the WGet native code to signal the current progress of
257 * downloading for the specified job.
258 * @param current A long representing the number of bytes that have
259 * been downloaded since the last update.
260 * @param expected A long representing the total number of bytes
261 * expected for this download.
262 * @see org.greenstone.gatherer.collection.Job
263 */
264 public synchronized void updateProgress(long current, long expected) {
265 job.updateProgress(current, expected);
266 }
267
268 /* There may be times when the download thread is sleeping, but the
269 * user has indicated that a previously paused job should now begin
270 * again. The flag within the job will change, so we tell the thread
271 * to start again.
272 */
273 public void resumeThread() {
274 synchronized(this) {
275 notify(); // Just incase its sleeping.
276 }
277 }
278
279 /* This begins the WGet thread, which simply iterates through the waiting
280 * jobs attempting each one. Successful downloads are removed from the
281 * waiting list.
282 * @see org.greenstone.gatherer.Gatherer
283 * @see org.greenstone.gatherer.collection.Job
284 */
285 public void run() {
286 while(true) {
287 // If there are jobs job_queue and we have more room.
288 if(job_queue.size() > 0) {
289 // Get the first job that isn't stopped.
290 for(Enumeration e = job_queue.elements(); e.hasMoreElements();) {
291 job = (Job) e.nextElement();
292 if(job.getState() == Job.RUNNING) {
293 Gatherer.println("Job " + job.toString() + " Begun.");
294 // A lock to prevent us deleting this job while its being
295 // run, unless you want things to go really wrong.
296 busy = true;
297 if(simple) {
298 job.callWGet();
299 }
300 else {
301 job.callWGetNative();
302 }
303 busy = false;
304 Gatherer.println("Job " + job.toString() + " complete.");
305 // And if the user has requested that complete jobs
306 // be removed, then remove it from the list.
307 if(remove_complete_jobs) {
308 deleteJob(job);
309 }
310 job = null;
311 }
312 }
313 }
314 // In order to save processor time, I'll suspend the thread
315 // if theres no advantage to it running. Actions such as
316 // new or complete jobs will resume the thread.
317 else {
318 try {
319 synchronized(this) {
320 Gatherer.println("WGet thread is waiting for Jobs.");
321 wait();
322 }
323 } catch (InterruptedException e) {
324 // Time to get going again.
325 }
326 }
327 } // End While.
328 }
329
330 /* Link to the call to the WGet Native method for downloading.
331 * @param argc An int representing the number of elements in argv.
332 * @param argv An array of objects passed as arguments to wget.
333 * @param debug Whether the native code should show java-only debug
334 * messages.
335 */
336 public native int wget(int argc, Object argv[], boolean debug);
337
338 /* Link to the call to the WGet Native method for converting url links.
339 * @param argc An int representing the number of object elements in argv.
340 * @param argv An array of objects passed as arguments to wget.
341 * @param urlc An int representing the number of url string elements
342 * in urlv.
343 * @param urlv An array of strings representing urls.
344 * @param filec An int representing the number of file string elements
345 * in filev.
346 * @param filev An array of strings representing files.
347 */
348 public native void wren(int argc, Object argv[], int urlc, Object urlv[], int filec, Object filev[]);
349}
350
351
352
353
354
355
356
Note: See TracBrowser for help on using the repository browser.