Context Navigation

source: trunk/gli/src/org/greenstone/gatherer/collection/Job.java@ 7275

Last change on this file since 7275 was 6842, checked in by mdewsnip, 20 years ago
Variable names changes needed because of the name change of the Gather, Enrich and Design pane java files.
Property svn:keywords set to `Author Date Id Revision`
File size: 18.8 KB

Line
1	/**
2	*#########################################################################
3	*
4	* A component of the Gatherer application, part of the Greenstone digital
5	* library suite from the New Zealand Digital Library Project at the
6	* University of Waikato, New Zealand.
7	*
8	* <BR><BR>
9	*
10	* Author: John Thompson, Greenstone Digital Library, University of Waikato
11	*
12	* <BR><BR>
13	*
14	* Copyright (C) 1999 New Zealand Digital Library Project
15	*
16	* <BR><BR>
17	*
18	* This program is free software; you can redistribute it and/or modify
19	* it under the terms of the GNU General Public License as published by
20	* the Free Software Foundation; either version 2 of the License, or
21	* (at your option) any later version.
22	*
23	* <BR><BR>
24	*
25	* This program is distributed in the hope that it will be useful,
26	* but WITHOUT ANY WARRANTY; without even the implied warranty of
27	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28	* GNU General Public License for more details.
29	*
30	* <BR><BR>
31	*
32	* You should have received a copy of the GNU General Public License
33	* along with this program; if not, write to the Free Software
34	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35	*########################################################################
36	*/
37	package org.greenstone.gatherer.collection;
38
39	import java.awt.event.*;
40	import java.io.*;
41	import java.net.*;
42	import java.util.*;
43	import javax.swing.tree.*;
44	import org.greenstone.gatherer.Dictionary;
45	import org.greenstone.gatherer.Gatherer;
46	import org.greenstone.gatherer.WGet;
47	import org.greenstone.gatherer.file.FileNode;
48	import org.greenstone.gatherer.file.FileSystemModel;
49	import org.greenstone.gatherer.file.WorkspaceTreeModel;
50	import org.greenstone.gatherer.gui.GProgressBar;
51	import org.greenstone.gatherer.util.AppendLineOnlyFileDocument;
52	import org.greenstone.gatherer.util.GURL;
53	import org.greenstone.gatherer.util.SynchronizedTreeModelTools;
54	import org.greenstone.gatherer.util.Utility;
55	/**
56	* @author John Thompson, Greenstone Digital Library, University of Waikato
57	* @version 2.0
58	*/
59	public class Job
60	implements ActionListener {
61
62	private boolean debug;
63	private boolean higher_directories;
64	private boolean no_parents;
65	private boolean other_hosts;
66	private boolean page_requisites;
67	private boolean quiet;
68
69	private AppendLineOnlyFileDocument download_log;
70
71	private GProgressBar progress;
72
73	private GURL initial = null;
74	private GURL url = null;
75
76	// private TreeModel model;
77
78	private int depth;
79	private int previous_state;
80	private int state;
81
82	private String current_url;
83	private String destination;
84	private String proxy_pass;
85	private String proxy_user;
86
87	private Vector encountered_urls;
88	private Vector failed_urls;
89
90	private WGet mummy;
91
92	public static int COMPLETE = 0;
93	public static int PAUSED = 1;
94	public static int RUNNING = 2;
95	public static int STOPPED = 3;
96
97	/**
98	*/
99	public Job(boolean debug, boolean no_parents, boolean other_hosts, boolean page_requisites, boolean quiet, URL initial, int depth, String destination, String proxy_pass, String proxy_user, WGet mummy, boolean simple) {
100	// this.model = model;
101
102	String log_filename = Utility.getLogDir(null) + "wget" + initial.hashCode() + ".log";
103	File log_file = new File(log_filename);
104	if(log_file.exists()) {
105	log_file.delete();
106	}
107	File parent_log_file = log_file.getParentFile();
108	parent_log_file.mkdirs();
109	parent_log_file = null;
110	log_file = null;
111
112	System.err.println("Creating the log file:" + log_filename);
113
114	this.debug = debug;
115	this.download_log = new AppendLineOnlyFileDocument(log_filename, false);
116	this.no_parents = no_parents;
117	this.other_hosts = other_hosts;
118	this.page_requisites = page_requisites;
119	this.quiet = quiet;
120	this.initial = new GURL(initial);
121	this.depth = depth;
122	this.destination = destination;
123	this.proxy_pass = proxy_pass;
124	this.proxy_user = proxy_user;
125	this.mummy = mummy;
126
127	progress = new GProgressBar(this, initial.toString(), simple);
128
129	encountered_urls = new Vector();
130	failed_urls = new Vector();
131
132	previous_state = STOPPED;
133	state = STOPPED;
134	}
135
136	/** Depending on which button on the progress bar was pushed,
137	* this method will affect the state of the Job and perhaps make
138	* calls to wget.class if necessary.
139	* @param event The ActionEvent fired from within the GProgressBar
140	* which we must respond to.
141	*/
142	public void actionPerformed(ActionEvent event) {
143	// The action button is used to alternately start or stop the
144	// job. If the current state of the job is paused then this
145	// restart is logically equivelent to a resume.
146	if(event.getSource() == progress.action) {
147	previous_state = state;
148	state = RUNNING;
149	mummy.resumeThread();
150	}
151	else if (event.getSource() == progress.cancel) {
152	if(state == RUNNING) {
153	previous_state = state;
154	state = STOPPED; // Should already be stopped.
155	}
156	else {
157	mummy.deleteJob(this);
158	}
159	}
160	}
161
162	/** Called by the WGet native code to inform us of a new download starting.
163	* @param raw_url The url that is being downloaded, as a String.
164	*/
165	public void addDownload(String raw_url) {
166	if(!encountered_urls.contains(raw_url)) {
167	encountered_urls.add(raw_url);
168	}
169	// Regardless create a new GURL
170	current_url = raw_url;
171	url = new GURL(raw_url);
172	progress.addDownload(raw_url);
173	}
174
175	/** Used to advise the Job of a newly parsed link. Its up to Job
176	* to decide if it already knows about this url, and if not to
177	* update its progress bar.
178	* @param raw_url The url in question as a String.
179	* @param type Whether the link is an internal or external link.
180	* @return A boolean indicating if the url was added.
181	*/
182	public boolean addLink(String raw_url, int type) {
183	///ystem.out.println("addLink("+url+", "+type+")");
184	if(!encountered_urls.contains(raw_url)) {
185	// Add it to the urls we've seen.
186	encountered_urls.add(raw_url);
187	// Add it the to links for the current GURL.
188
189	// Add it to the progress file count.
190	progress.increaseFileCount();
191	return true;
192	}
193	// Regardless add it to the children links of the current GURL
194	initial.addLink(raw_url);
195
196	// We've seen it before. Don't count it again.
197	return false;
198	}
199
200	public void callWGet() {
201	// Build parameter string. Note that we never clobber, and we continue if possible
202	String command = Gatherer.config.getWGetPath() + " -nc -c ";
203
204	// Add the destination parameter
205	if(destination != null) {
206	command = command + "-P " + destination + " ";
207	}
208
209	if(depth < 0) {
210	// Infinite recursion
211	command = command + "-r ";
212	}
213	else if (depth == 0) {
214	// Just this page.
215	}
216	else if (depth > 0) {
217	// Recursion to the specified depth.
218	command = command + "-r -l" + depth + " ";
219	}
220
221	// Determine if we have to use a proxy.
222	if(Gatherer.config.get("general.use_proxy", true)) {
223	String proxy_host = Gatherer.config.getString("general.proxy_host", true);
224	String proxy_port = Gatherer.config.getString("general.proxy_port", true);
225	// Find out whether the user has already authenticated themselves
226	String user_pass = null;
227	String address = proxy_host + ":" + proxy_port;
228	int count = 0;
229	while(count < 3 && (user_pass = (String) Gatherer.authentications.get(address)) == null) {
230	Authenticator.requestPasswordAuthentication(proxy_host, null, Integer.parseInt(proxy_port), "http://", Dictionary.get("WGet.Prompt"), "HTTP");
231	count++;
232	}
233	if(count >= 3) {
234	state = STOPPED;
235	return;
236	}
237	if(user_pass.indexOf("@") != -1) {
238	// Write the use proxy command
239	command = command + "-e httpproxy=" + proxy_host + ":" + proxy_port + "/ --proxy-user=" + user_pass.substring(0, user_pass.indexOf("@")) + " --proxy-passwd=" + user_pass.substring(user_pass.indexOf("@") + 1) + " -Y on ";
240
241	}
242	else {
243	Gatherer.println("Unknown user/pass");
244	}
245	}
246
247	// The user can either choose to mirror all of the page requisites...
248	if(page_requisites) {
249	command = command + "-p ";
250	}
251	// or not. In which case we ensure links are rewritten.
252	else {
253	command = command + "-k ";
254	}
255
256	if(other_hosts) {
257	command = command + "-H ";
258	}
259
260	// Finally tell it the site to download.
261	command = command + initial.toString();
262
263	if(previous_state == Job.COMPLETE) {
264	progress.mirrorBegun(true, true);
265	}
266	else {
267	progress.mirrorBegun(false, true);
268	}
269
270	// Run it
271	try {
272	Gatherer.println("Cmd: " + command);
273	Runtime rt = Runtime.getRuntime();
274	Process prcs = rt.exec(command);
275	InputStreamReader isr = new InputStreamReader(prcs.getErrorStream());
276	BufferedReader br = new BufferedReader(isr);
277	// Capture the standard error stream and seach for two particular occurances.
278	String line;
279	boolean ignore_for_robots = false;
280	while ((line = br.readLine()) != null && state != STOPPED) {
281	Gatherer.println(line);
282	download_log.appendLine(line);
283	// The first magic special test is to see if we've just
284	// asked for the robots.txt file. If so we ignore
285	// the next add and then the next complete/error.
286	if(line.lastIndexOf("robots.txt;") != -1) {
287	Gatherer.println("***** Requesting robot.txt");
288	ignore_for_robots = true;
289	}
290	// If line contains "=> `" display text as the
291	// currently downloading url. Unique to add download.
292	else if(line.lastIndexOf("=> `") != -1) {
293	if(!ignore_for_robots) {
294	// Add download
295	String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
296	// Remove the destination guff
297	if(destination != null) {
298	new_url = new_url.substring(destination.length());
299	}
300	addDownload("http:/" + new_url);
301	}
302	}
303	// If line contains "/s) - `" set currently
304	// downloading url to "Download Complete".
305	else if(line.lastIndexOf("/s) - `") != -1) {
306	String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
307	if(!ignore_for_robots) {
308	Gatherer.println("Not ignore for robots");
309	// Download complete
310	downloadComplete(current_file_downloading);
311	}
312	else {
313	Gatherer.println("Ignore for robots");
314	ignore_for_robots = false;
315	}
316	}
317	// The already there line begins "File `..." However this
318	// is only true in english, so instead I looked and there
319	// are few (if any at all) other messages than those above
320	// and not overwriting messages that use " `" so we'll
321	// look for that. Note this method is not guarenteed to be
322	// unique like the previous two.
323	else if(line.lastIndexOf(" `") != -1) {
324	// Not Overwriting
325	Gatherer.println("Already there.");
326	String new_url =
327	line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
328	// For some strange reason this won't compile
329	// src/org/greenstone/gatherer/collection/Job.java:311: cannot resolve symbol
330	// symbol : class CAKE
331	// location: class org.greenstone.gatherer.collection.Job
332	/* ***********************************************************
333	CAKE CAKE CAKE CAKE I WANT CAKE GIVE ME CAKE NOW!!
334	*********************************************************** */
335	// Remove the destination guff
336	if(destination != null) {
337	new_url = new_url.substring(destination.length());
338	}
339	addDownload("http:/" + new_url);
340	downloadWarning();
341	}
342	// Any other important message starts with the time in the form hh:mm:ss
343	else if(line.length() > 7) {
344	if(line.charAt(2) == ':' && line.charAt(5) == ':') {
345	if(!ignore_for_robots) {
346	Gatherer.println("Error.");
347	downloadFailed();
348	}
349	else {
350	ignore_for_robots = false;
351	}
352	}
353	}
354	}
355	if(state == STOPPED) {
356	isr.close();
357	prcs.destroy(); // This doesn't always work, but it's worth a try
358	}
359	else {
360	// Now display final message based on exit value
361	prcs.waitFor();
362	}
363	}
364	catch (Exception ioe) {
365	//message(Utility.ERROR, ioe.toString());
366	Gatherer.printStackTrace(ioe);
367	}
368	// If we've got to here and the state isn't STOPPED then the
369	// job is complete.
370	if(state == Job.RUNNING) {
371	progress.mirrorComplete();
372	previous_state = state;
373	state = Job.COMPLETE;
374	}
375	}
376
377	/** The most important part of the Job class, this method is
378	* responsible for calling the WGet native methods used to
379	* mirror the indicated url. By this stage all the variables
380	* necessary should be set and we need only build up the
381	* parameter string and make the call.
382	*/
383	public void callWGetNative() {
384	Vector args = new Vector();
385
386	// Let the GProgressBar know we're starting, just in case
387	// the user hasn't told us to. If this is the second time the
388	// urls downloaded and the first attempt was successful (ie
389	// the previous job was complete), then we have the case where
390	// the user is forcing us to remirror. Reset all the values etc
391	// if this is the case then reset the variables.
392	// Note that this can cause the result line to look something
393	// like this.
394	// Downloaded 12 of 12 files (8 warnings, 0 errors).
395	// The warnings would be something like, 'File already downloaded'
396	// but the total number of files and the file successfully
397	// downloaded will be correct.
398	if(previous_state == Job.COMPLETE) {
399	progress.mirrorBegun(true, false);
400	}
401	else {
402	progress.mirrorBegun(false, false);
403	}
404
405	// Parse arguments into array.
406	args.add(Utility.BASE_DIR + "wget");
407	args.add("-d");
408	args.add("-o");
409	args.add("debug.txt");
410
411	if(destination != null) {
412	args.add("-P");
413	args.add(destination);
414	}
415
416	if(depth < 0) {
417	// Infinite recursion
418	args.add("-r");
419	}
420	else if (depth == 0) {
421	// Just this page.
422	}
423	else if (depth > 0) {
424	// Recursion to the specified depth.
425	args.add("-r");
426	args.add("-l");
427	args.add("" + depth + ""); // Hacky
428	}
429
430	if(previous_state == PAUSED) {
431	args.add("-nc");
432	args.add("-c");
433	}
434
435	if(proxy_user != null) {
436	args.add("--proxy-user=" + proxy_user);
437	args.add("--proxy-passwd=" + proxy_pass);
438	}
439
440	if(page_requisites) {
441	args.add("-p");
442	}
443
444	if(quiet) {
445	args.add("-q");
446	}
447
448	if(other_hosts) {
449	args.add("-H");
450	}
451
452	args.add(initial.toString());
453
454	Gatherer.println("Calling wget ");
455	for(Enumeration e = args.elements(); e.hasMoreElements();) {
456	Gatherer.println(e.nextElement() + " ");
457	}
458	Gatherer.println("");
459
460	// Run home to mummy.
461	int value = mummy.wget(args.size(), args.toArray(), debug);
462
463	// If we've got to here and the state isn't STOPPED then the job is complete.
464	if(state == RUNNING) {
465	progress.mirrorComplete();
466	previous_state = state;
467	state = COMPLETE;
468	}
469	}
470
471	/** Called by the WGet native code when the current download is
472	* completed. In turn all download listeners are informed.
473	*/
474	public void downloadComplete() {
475	progress.downloadComplete();
476	url = null;
477	current_url = null;
478	}
479
480	public void downloadComplete(String current_file_downloading) {
481	progress.downloadComplete();
482	Gatherer.println("Current File: " + current_file_downloading);
483	//WorkspaceTreeModel.refreshWebCacheMappings();
484	if(Gatherer.g_man.gather_pane.workspace_tree != null) {
485	FileSystemModel tree_model = (FileSystemModel) Gatherer.g_man.gather_pane.workspace_tree.getModel();
486	File new_file = new File(current_file_downloading);
487	File parent_file = new_file.getParentFile();
488	String download_cache = Utility.getCacheDir().getAbsolutePath();
489	ArrayList raw_path = new ArrayList();
490	while(parent_file != null && !parent_file.getAbsolutePath().equals(download_cache)) {
491	raw_path.add(0, parent_file.getName());
492	parent_file = parent_file.getParentFile();
493	}
494	download_cache = null;
495	// Add download cache name
496	/** @todo - add to dictionary */
497	raw_path.add(0, "Mirroring.Mirror_Cache");
498	// And the root node
499	raw_path.add(0, tree_model.getRoot());
500	TreePath destination_path = new TreePath(raw_path.toArray());
501	raw_path = null;
502	// Retrieve the destination node
503	FileNode destination_node = (FileNode) tree_model.getNode(destination_path);
504	// destination_path = null;
505	//FileNode new_file_node = new FileNode(new_file);
506
507	// It suddenly occurs to me that by retrieving the destination path, we are causing the potential destination node to map its children which includes the file which I am about to add. Hence I was ending up with two copies.
508	///atherer.println("Ready to insert new FileNode.");
509	Gatherer.println("Model: " + tree_model);
510	Gatherer.println("Destination path: " + destination_path);
511	destination_node.unmap();
512	///atherer.println("Destination node: " + destination_node);
513	///atherer.println("New node: " + new_file_node);
514	//SynchronizedTreeModelTools.insertNodeInto(tree_model, destination_node, new_file_node);
515
516	//new_file_node = null;
517	destination_node = null;
518	tree_model = null;
519	}
520	url = null;
521	current_url = null;
522	}
523
524	/** Called by the WGet native code when the requested download returns
525	* a status code other than 200.
526	*/
527	public void downloadFailed() {
528	///ystem.out.println("downloadFailed("+current_url+")");
529	failed_urls.add(current_url); // Its the current url thats failed.
530	progress.downloadFailed();
531	}
532
533	/**
534	*/
535	public void downloadWarning() {
536	progress.downloadWarning();
537	}
538
539	/**
540	* @return A String representing the currently downloading url.
541	*/
542	/* private String getCurrent() {
543	return current_url;
544	} */
545
546	/**
547	* @return A String representing the initial urls host (root node
548	* of tree that we are mirroring).
549	*/
550	public String getHost() {
551	return url.getHost();
552	}
553
554	public AppendLineOnlyFileDocument getLogDocument() {
555	return download_log;
556	}
557
558	/**
559	* @return Returns the progress bar associated with this job.
560	*/
561	public GProgressBar getProgressBar() {
562	return progress;
563	}
564
565	/** Called to discover if the user wanted this thread to run or if
566	* it is paused.
567	* @return An int representing the current Job state.
568	*/
569	public int getState() {
570	return state;
571	}
572
573	/** Returns the current state of the stop flag for this job.
574	* @return A boolean representing whether the user has requested to
575	* stop.
576	*/
577	public boolean hasSignalledStop() {
578	if(state == Job.STOPPED \|\| state == Job.PAUSED \|\|
579	state == Job.COMPLETE) {
580	return true;
581	}
582	return false;
583	}
584
585	public void setState(int state) {
586	previous_state = this.state;
587	this.state = state;
588	}
589
590	/** A convinence call.
591	* @return A String representing the url of the initial url (root node of the mirrored tree).
592	*/
593	public String toString() {
594	return initial.toString();
595	}
596
597	/** Called by the WGet native code to signal the current progress of
598	* downloading.
599	* @param current A long representing the number of bytes that have
600	* been downloaded since the last update.
601	* @param expected A long representing the total number of bytes
602	* expected for this download.
603	*/
604	public void updateProgress(long current, long expected) {
605	progress.updateProgress(current, expected);
606	}
607	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: