Context Navigation

source: trunk/gli/src/org/greenstone/gatherer/collection/Job.java@ 6622

Last change on this file since 6622 was 6622, checked in by jmt12, 20 years ago
More modifications to mirroring including testing for a valid version of Wget (and complaining if its missing or it is old) and rearranging buttons on the GProgressBar
Property svn:keywords set to `Author Date Id Revision`
File size: 18.0 KB

Line
1	/**
2	*#########################################################################
3	*
4	* A component of the Gatherer application, part of the Greenstone digital
5	* library suite from the New Zealand Digital Library Project at the
6	* University of Waikato, New Zealand.
7	*
8	* <BR><BR>
9	*
10	* Author: John Thompson, Greenstone Digital Library, University of Waikato
11	*
12	* <BR><BR>
13	*
14	* Copyright (C) 1999 New Zealand Digital Library Project
15	*
16	* <BR><BR>
17	*
18	* This program is free software; you can redistribute it and/or modify
19	* it under the terms of the GNU General Public License as published by
20	* the Free Software Foundation; either version 2 of the License, or
21	* (at your option) any later version.
22	*
23	* <BR><BR>
24	*
25	* This program is distributed in the hope that it will be useful,
26	* but WITHOUT ANY WARRANTY; without even the implied warranty of
27	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28	* GNU General Public License for more details.
29	*
30	* <BR><BR>
31	*
32	* You should have received a copy of the GNU General Public License
33	* along with this program; if not, write to the Free Software
34	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35	*########################################################################
36	*/
37	package org.greenstone.gatherer.collection;
38
39	import java.awt.event.*;
40	import java.io.*;
41	import java.net.*;
42	import java.util.*;
43	import javax.swing.tree.*;
44	import org.greenstone.gatherer.Dictionary;
45	import org.greenstone.gatherer.Gatherer;
46	import org.greenstone.gatherer.WGet;
47	import org.greenstone.gatherer.file.FileNode;
48	import org.greenstone.gatherer.file.FileSystemModel;
49	import org.greenstone.gatherer.file.WorkspaceTreeModel;
50	import org.greenstone.gatherer.gui.GProgressBar;
51	import org.greenstone.gatherer.util.GURL;
52	import org.greenstone.gatherer.util.SynchronizedTreeModelTools;
53	import org.greenstone.gatherer.util.Utility;
54	/**
55	* @author John Thompson, Greenstone Digital Library, University of Waikato
56	* @version 2.0
57	*/
58	public class Job
59	implements ActionListener {
60
61	private boolean debug;
62	private boolean higher_directories;
63	private boolean no_parents;
64	private boolean other_hosts;
65	private boolean page_requisites;
66	private boolean quiet;
67
68	private GProgressBar progress;
69
70	private GURL initial = null;
71	private GURL url = null;
72
73	// private TreeModel model;
74
75	private int depth;
76	private int previous_state;
77	private int state;
78
79	private String current_url;
80	private String destination;
81	private String proxy_pass;
82	private String proxy_user;
83
84	private Vector encountered_urls;
85	private Vector failed_urls;
86
87	private WGet mummy;
88
89	public static int COMPLETE = 0;
90	public static int PAUSED = 1;
91	public static int RUNNING = 2;
92	public static int STOPPED = 3;
93
94	/**
95	*/
96	public Job(boolean debug, boolean no_parents, boolean other_hosts, boolean page_requisites, boolean quiet, URL initial, int depth, String destination, String proxy_pass, String proxy_user, WGet mummy, boolean simple) {
97	// this.model = model;
98
99	this.debug = debug;
100	this.no_parents = no_parents;
101	this.other_hosts = other_hosts;
102	this.page_requisites = page_requisites;
103	this.quiet = quiet;
104	this.initial = new GURL(initial);
105	this.depth = depth;
106	this.destination = destination;
107	this.proxy_pass = proxy_pass;
108	this.proxy_user = proxy_user;
109	this.mummy = mummy;
110
111	progress = new GProgressBar(this, initial.toString(), simple);
112
113	encountered_urls = new Vector();
114	failed_urls = new Vector();
115
116	previous_state = STOPPED;
117	state = STOPPED;
118	}
119
120	/** Depending on which button on the progress bar was pushed,
121	* this method will affect the state of the Job and perhaps make
122	* calls to wget.class if necessary.
123	* @param event The ActionEvent fired from within the GProgressBar
124	* which we must respond to.
125	*/
126	public void actionPerformed(ActionEvent event) {
127	// The action button is used to alternately start or stop the
128	// job. If the current state of the job is paused then this
129	// restart is logically equivelent to a resume.
130	if(event.getSource() == progress.action) {
131	previous_state = state;
132	if(state == RUNNING) {
133	state = PAUSED;
134	}
135	else {
136	state = RUNNING;
137	mummy.resumeThread();
138	}
139	}
140	else if (event.getSource() == progress.cancel) {
141	state = STOPPED; // Should already be stopped.
142	mummy.deleteJob(this);
143	}
144	}
145
146	/** Called by the WGet native code to inform us of a new download starting.
147	* @param url The url that is being downloaded, as a String.
148	*/
149	public void addDownload(String raw_url) {
150	if(!encountered_urls.contains(raw_url)) {
151	encountered_urls.add(raw_url);
152	}
153	// Regardless create a new GURL
154	current_url = raw_url;
155	url = new GURL(raw_url);
156	progress.addDownload(raw_url);
157	}
158
159	/** Used to advise the Job of a newly parsed link. Its up to Job
160	* to decide if it already knows about this url, and if not to
161	* update its progress bar.
162	* @param url The url in question as a String.
163	* @param type Whether the link is an internal or external link.
164	* @return A boolean indicating if the url was added.
165	*/
166	public boolean addLink(String raw_url, int type) {
167	///ystem.out.println("addLink("+url+", "+type+")");
168	if(!encountered_urls.contains(raw_url)) {
169	// Add it to the urls we've seen.
170	encountered_urls.add(raw_url);
171	// Add it the to links for the current GURL.
172
173	// Add it to the progress file count.
174	progress.increaseFileCount();
175	return true;
176	}
177	// Regardless add it to the children links of the current GURL
178	initial.addLink(raw_url);
179
180	// We've seen it before. Don't count it again.
181	return false;
182	}
183
184	public void callWGet() {
185	// Build parameter string. Note that we never clobber, and we continue if possible
186	String command = Gatherer.config.getWGetPath() + " -nc -c ";
187
188	// Add the destination parameter
189	if(destination != null) {
190	command = command + "-P " + destination + " ";
191	}
192
193	if(depth < 0) {
194	// Infinite recursion
195	command = command + "-r ";
196	}
197	else if (depth == 0) {
198	// Just this page.
199	}
200	else if (depth > 0) {
201	// Recursion to the specified depth.
202	command = command + "-r -l" + depth + " ";
203	}
204
205	// Determine if we have to use a proxy.
206	if(Gatherer.config.get("general.use_proxy", true)) {
207	String proxy_host = Gatherer.config.getString("general.proxy_host", true);
208	String proxy_port = Gatherer.config.getString("general.proxy_port", true);
209	// Find out whether the user has already authenticated themselves
210	String user_pass = null;
211	String address = proxy_host + ":" + proxy_port;
212	int count = 0;
213	while(count < 3 && (user_pass = (String) Gatherer.authentications.get(address)) == null) {
214	Authenticator.requestPasswordAuthentication(proxy_host, null, Integer.parseInt(proxy_port), "http://", Dictionary.get("WGet.Prompt"), "HTTP");
215	count++;
216	}
217	if(count >= 3) {
218	state = STOPPED;
219	return;
220	}
221	if(user_pass.indexOf("@") != -1) {
222	// Write the use proxy command
223	command = command + "-e httpproxy=" + proxy_host + ":" + proxy_port + "/ --proxy-user=\"" + user_pass.substring(0, user_pass.indexOf("@")) + "\" --proxy-passwd=*** -Y on ";
224
225	}
226	else {
227	Gatherer.println("Unknown user/pass");
228	}
229	}
230
231	// The user can either choose to mirror all of the page requisites...
232	if(page_requisites) {
233	command = command + "-p ";
234	}
235	// or not. In which case we ensure links are rewritten.
236	else {
237	command = command + "-k ";
238	}
239
240	if(other_hosts) {
241	command = command + "-H ";
242	}
243
244	// Finally tell it the site to download.
245	command = command + initial.toString();
246
247	if(previous_state == Job.COMPLETE) {
248	progress.mirrorBegun(true, true);
249	}
250	else {
251	progress.mirrorBegun(false, true);
252	}
253
254	// Run it
255	try {
256	Gatherer.println("Cmd: " + command);
257	Runtime rt = Runtime.getRuntime();
258	Process prcs = rt.exec(command);
259	InputStreamReader isr = new InputStreamReader(prcs.getErrorStream());
260	BufferedReader br = new BufferedReader(isr);
261	// Capture the standard error stream and seach for two particular occurances.
262	String line;
263	boolean ignore_for_robots = false;
264	while ((line = br.readLine()) != null) {
265	Gatherer.println(line);
266
267	// The first magic special test is to see if we've just
268	// asked for the robots.txt file. If so we ignore
269	// the next add and then the next complete/error.
270	if(line.lastIndexOf("robots.txt;") != -1) {
271	Gatherer.println("***** Requesting robot.txt");
272	ignore_for_robots = true;
273	}
274	// If line contains "=> `" display text as the
275	// currently downloading url. Unique to add download.
276	else if(line.lastIndexOf("=> `") != -1) {
277	if(!ignore_for_robots) {
278	// Add download
279	String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
280	// Remove the destination guff
281	if(destination != null) {
282	new_url = new_url.substring(destination.length());
283	}
284	addDownload("http:/" + new_url);
285	}
286	}
287	// If line contains "/s) - `" set currently
288	// downloading url to "Download Complete".
289	else if(line.lastIndexOf("/s) - `") != -1) {
290	String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
291	if(!ignore_for_robots) {
292	Gatherer.println("Not ignore for robots");
293	// Download complete
294	downloadComplete(current_file_downloading);
295	}
296	else {
297	Gatherer.println("Ignore for robots");
298	ignore_for_robots = false;
299	}
300	}
301	// The already there line begins "File `..." However this
302	// is only true in english, so instead I looked and there
303	// are few (if any at all) other messages than those above
304	// and not overwriting messages that use " `" so we'll
305	// look for that. Note this method is not guarenteed to be
306	// unique like the previous two.
307	else if(line.lastIndexOf(" `") != -1) {
308	// Not Overwriting
309	Gatherer.println("Already there.");
310	String new_url =
311	line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
312	// For some strange reason this won't compile
313	// src/org/greenstone/gatherer/collection/Job.java:311: cannot resolve symbol
314	// symbol : class CAKE
315	// location: class org.greenstone.gatherer.collection.Job
316	/* ***********************************************************
317	CAKE CAKE CAKE CAKE I WANT CAKE GIVE ME CAKE NOW!!
318	*********************************************************** */
319	// Remove the destination guff
320	if(destination != null) {
321	new_url = new_url.substring(destination.length());
322	}
323	addDownload("http:/" + new_url);
324	downloadWarning();
325	}
326	// Any other important message starts with the time in the form hh:mm:ss
327	else if(line.length() > 7) {
328	if(line.charAt(2) == ':' && line.charAt(5) == ':') {
329	if(!ignore_for_robots) {
330	Gatherer.println("Error.");
331	downloadFailed();
332	}
333	else {
334	ignore_for_robots = false;
335	}
336	}
337	}
338	}
339	// Now display final message based on exit value
340	prcs.waitFor();
341	}
342	catch (Exception ioe) {
343	//message(Utility.ERROR, ioe.toString());
344	Gatherer.printStackTrace(ioe);
345	}
346	// If we've got to here and the state isn't STOPPED then the
347	// job is complete.
348	if(state == Job.RUNNING) {
349	progress.mirrorComplete();
350	previous_state = state;
351	state = Job.COMPLETE;
352	}
353	}
354
355	/** The most important part of the Job class, this method is
356	* responsible for calling the WGet native methods used to
357	* mirror the indicated url. By this stage all the variables
358	* necessary should be set and we need only build up the
359	* parameter string and make the call.
360	*/
361	public void callWGetNative() {
362	Vector args = new Vector();
363
364	// Let the GProgressBar know we're starting, just in case
365	// the user hasn't told us to. If this is the second time the
366	// urls downloaded and the first attempt was successful (ie
367	// the previous job was complete), then we have the case where
368	// the user is forcing us to remirror. Reset all the values etc
369	// if this is the case then reset the variables.
370	// Note that this can cause the result line to look something
371	// like this.
372	// Downloaded 12 of 12 files (8 warnings, 0 errors).
373	// The warnings would be something like, 'File already downloaded'
374	// but the total number of files and the file successfully
375	// downloaded will be correct.
376	if(previous_state == Job.COMPLETE) {
377	progress.mirrorBegun(true, false);
378	}
379	else {
380	progress.mirrorBegun(false, false);
381	}
382
383	// Parse arguments into array.
384	args.add(Utility.BASE_DIR + "wget");
385	args.add("-d");
386	args.add("-o");
387	args.add("debug.txt");
388
389	if(destination != null) {
390	args.add("-P");
391	args.add(destination);
392	}
393
394	if(depth < 0) {
395	// Infinite recursion
396	args.add("-r");
397	}
398	else if (depth == 0) {
399	// Just this page.
400	}
401	else if (depth > 0) {
402	// Recursion to the specified depth.
403	args.add("-r");
404	args.add("-l");
405	args.add("" + depth + ""); // Hacky
406	}
407
408	if(previous_state == PAUSED) {
409	args.add("-nc");
410	args.add("-c");
411	}
412
413	if(proxy_user != null) {
414	args.add("--proxy-user=" + proxy_user);
415	args.add("--proxy-passwd=" + proxy_pass);
416	}
417
418	if(page_requisites) {
419	args.add("-p");
420	}
421
422	if(quiet) {
423	args.add("-q");
424	}
425
426	if(other_hosts) {
427	args.add("-H");
428	}
429
430	args.add(initial.toString());
431
432	Gatherer.println("Calling wget ");
433	for(Enumeration e = args.elements(); e.hasMoreElements();) {
434	Gatherer.println(e.nextElement() + " ");
435	}
436	Gatherer.println("");
437
438	// Run home to mummy.
439	int value = mummy.wget(args.size(), args.toArray(), debug);
440
441	// If we've got to here and the state isn't STOPPED then the job is complete.
442	if(state == RUNNING) {
443	progress.mirrorComplete();
444	previous_state = state;
445	state = COMPLETE;
446	}
447	}
448
449	/** Called by the WGet native code when the current download is
450	* completed. In turn all download listeners are informed.
451	*/
452	public void downloadComplete() {
453	progress.downloadComplete();
454	url = null;
455	current_url = null;
456	}
457
458	public void downloadComplete(String current_file_downloading) {
459	progress.downloadComplete();
460	Gatherer.println("Current File: " + current_file_downloading);
461	//WorkspaceTreeModel.refreshWebCacheMappings();
462	if(Gatherer.g_man.collection_pane.workspace_tree != null) {
463	FileSystemModel tree_model = (FileSystemModel) Gatherer.g_man.collection_pane.workspace_tree.getModel();
464	File new_file = new File(current_file_downloading);
465	File parent_file = new_file.getParentFile();
466	String download_cache = Utility.getCacheDir().getAbsolutePath();
467	ArrayList raw_path = new ArrayList();
468	while(parent_file != null && !parent_file.getAbsolutePath().equals(download_cache)) {
469	raw_path.add(0, parent_file.getName());
470	parent_file = parent_file.getParentFile();
471	}
472	download_cache = null;
473	// Add download cache name
474	/** @todo - add to dictionary */
475	raw_path.add(0, "Mirroring.Mirror_Cache");
476	// And the root node
477	raw_path.add(0, tree_model.getRoot());
478	TreePath destination_path = new TreePath(raw_path.toArray());
479	raw_path = null;
480	// Retrieve the destination node
481	FileNode destination_node = (FileNode) tree_model.getNode(destination_path);
482	// destination_path = null;
483	//FileNode new_file_node = new FileNode(new_file);
484
485	// It suddenly occurs to me that by retrieving the destination path, we are causing the potential destination node to map its children which includes the file which I am about to add. Hence I was ending up with two copies.
486	///atherer.println("Ready to insert new FileNode.");
487	Gatherer.println("Model: " + tree_model);
488	Gatherer.println("Destination path: " + destination_path);
489	destination_node.unmap();
490	///atherer.println("Destination node: " + destination_node);
491	///atherer.println("New node: " + new_file_node);
492	//SynchronizedTreeModelTools.insertNodeInto(tree_model, destination_node, new_file_node);
493
494	//new_file_node = null;
495	destination_node = null;
496	tree_model = null;
497	}
498	url = null;
499	current_url = null;
500	}
501
502	/** Called by the WGet native code when the requested download returns
503	* a status code other than 200.
504	*/
505	public void downloadFailed() {
506	///ystem.out.println("downloadFailed("+current_url+")");
507	failed_urls.add(current_url); // Its the current url thats failed.
508	progress.downloadFailed();
509	}
510
511	/**
512	*/
513	public void downloadWarning() {
514	progress.downloadWarning();
515	}
516
517	/**
518	* @return A String representing the currently downloading url.
519	*/
520	/* private String getCurrent() {
521	return current_url;
522	} */
523
524	/**
525	* @return A String representing the initial urls host (root node
526	* of tree that we are mirroring).
527	*/
528	public String getHost() {
529	return url.getHost();
530	}
531
532	/**
533	* @return Returns the progress bar associated with this job.
534	*/
535	public GProgressBar getProgressBar() {
536	return progress;
537	}
538
539	/** Called to discover if the user wanted this thread to run or if
540	* it is paused.
541	* @return An int representing the current Job state.
542	*/
543	public int getState() {
544	return state;
545	}
546
547	/** Returns the current state of the stop flag for this job.
548	* @return A boolean representing whether the user has requested to
549	* stop.
550	*/
551	public boolean hasSignalledStop() {
552	if(state == Job.STOPPED \|\| state == Job.PAUSED \|\|
553	state == Job.COMPLETE) {
554	return true;
555	}
556	return false;
557	}
558
559	public void setState(int state) {
560	previous_state = this.state;
561	this.state = state;
562	}
563
564	/** A convinence call.
565	* @return A String representing the url of the initial url (root node of the mirrored tree).
566	*/
567	public String toString() {
568	return initial.toString();
569	}
570
571	/** Called by the WGet native code to signal the current progress of
572	* downloading.
573	* @param current A long representing the number of bytes that have
574	* been downloaded since the last update.
575	* @param expected A long representing the total number of bytes
576	* expected for this download.
577	*/
578	public void updateProgress(long current, long expected) {
579	progress.updateProgress(current, expected);
580	}
581	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: