source: trunk/gli/src/org/greenstone/gatherer/download/DownloadJob.java@ 13466

Last change on this file since 13466 was 13466, checked in by mdewsnip, 17 years ago

Removed some unused stuff.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.5 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.download;
38
39import java.awt.event.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.tree.*;
44import org.greenstone.gatherer.Configuration;
45import org.greenstone.gatherer.DebugStream;
46import org.greenstone.gatherer.Dictionary;
47import org.greenstone.gatherer.Gatherer;
48import org.greenstone.gatherer.GAuthenticator;
49import org.greenstone.gatherer.LocalGreenstone;
50import org.greenstone.gatherer.file.WorkspaceTree;
51import org.greenstone.gatherer.util.AppendLineOnlyFileDocument;
52import org.greenstone.gatherer.util.GURL;
53import org.greenstone.gatherer.util.Utility;
54import org.greenstone.gatherer.cdm.Argument;
55import org.greenstone.gatherer.collection.*;
56/**
57 * @author John Thompson, Greenstone Digital Library, University of Waikato
58 * @version 2.0
59 */
60public class DownloadJob
61 implements ActionListener {
62
63 private boolean debug;
64 private boolean higher_directories;
65 private boolean no_parents;
66 private boolean other_hosts;
67 private boolean page_requisites;
68 private boolean quiet;
69
70 private AppendLineOnlyFileDocument download_log;
71
72 private DownloadProgressBar progress;
73
74 private GURL initial = null;
75 private GURL url = null;
76
77
78 // private TreeModel model;
79
80 private int depth;
81 private int previous_state;
82 private int state;
83
84 private String download_url = "";
85
86 // private String current_url;
87 // private String destination;
88 private String proxy_pass;
89 private String proxy_user;
90
91 private Vector encountered_urls;
92 private Vector failed_urls;
93 private Download download;
94 private DownloadScrollPane mummy;
95 private HashMap download_option;
96
97 public static int COMPLETE = 0;
98 public static int PAUSED = 1;
99 public static int RUNNING = 2;
100 public static int STOPPED = 3;
101
102 public static int UNKNOWN_MAX = 0;
103 public static int DEFINED_MAX = 1;
104 public static int UNDEFINED_MAX = 2;
105
106 private String mode = null;
107
108 private String proxy_url;
109
110 /**
111 */
112 public DownloadJob(Download download, String proxy_pass, String proxy_user, DownloadScrollPane mummy, String mode, String proxy_url) {
113 URL url = null;
114 int folder_hash;
115
116 this.proxy_url = proxy_url;
117
118 download_option = downloadToHashMap(download);
119 if (!mode.equals("Z3950") && !mode.equals("SRW")) {
120 Argument url_arg = (Argument)download_option.get((String)"url");
121 download_url = url_arg.getValue();
122
123 }
124 else {
125 Argument host_arg = (Argument)download_option.get((String)"host");
126 Argument port_arg = (Argument)download_option.get((String)"port");
127 download_url = host_arg.getValue() + ":" +port_arg.getValue();
128 }
129
130 folder_hash = download_url.hashCode();
131 String log_filename = Utility.getLogDir(null) + "download-"+ mode + folder_hash + ".log";
132 File log_file = new File(log_filename);
133 if(log_file.exists()) {
134 log_file.delete();
135 }
136
137 File parent_log_file = log_file.getParentFile();
138 parent_log_file.mkdirs();
139 parent_log_file = null;
140 log_file = null;
141
142 this.download_log = new AppendLineOnlyFileDocument(log_filename, false);
143
144 this.proxy_pass = proxy_pass;
145 this.proxy_user = proxy_user;
146 this.mummy = mummy;
147 this.mode = mode;
148 this.download = download;
149
150 progress = new DownloadProgressBar(this,download_url, true);
151 encountered_urls = new Vector();
152 failed_urls = new Vector();
153
154 previous_state = STOPPED;
155 state = STOPPED;
156 }
157
158 private HashMap downloadToHashMap(Download download)
159 {
160 HashMap download_option = new HashMap();
161 ArrayList arguments = download.getArguments(true, false);
162 for(int i = 0; i < arguments.size(); i++) {
163 Argument argument = (Argument) arguments.get(i);
164 download_option.put(argument.getName(), argument);
165 }
166 return download_option;
167 }
168
169 /** Depending on which button on the progress bar was pushed,
170 * this method will affect the state of the DownloadJob and perhaps make
171 * calls to wget.class if necessary.
172 * @param event The ActionEvent fired from within the DownloadProgressBar
173 * which we must respond to.
174 */
175 public void actionPerformed(ActionEvent event) {
176 // The stop_start_button is used to alternately start or stop the
177 // job. If the current state of the job is paused then this
178 // restart is logically equivelent to a resume.
179 if(event.getSource() == progress.stop_start_button) {
180 previous_state = state;
181 if (state == RUNNING) {
182 state = STOPPED;
183 } else {
184 //previous_state = state;
185 state = RUNNING;
186 mummy.resumeThread();
187 }
188 }
189 else if (event.getSource() == progress.close_button) {
190 if(state == RUNNING) {
191 previous_state = state;
192 state = STOPPED; // do we need to do anything else to stop this?
193 }
194 mummy.deleteDownloadJob(this);
195 }
196 }
197
198
199 public void callDownload() {
200
201 ArrayList command_list = new ArrayList();
202 if (Utility.isWindows()) {
203 command_list.add(Configuration.perl_path);
204 command_list.add("-S");
205 }
206 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
207 command_list.add("-download_mode");
208 command_list.add(mode);
209 command_list.add("-cache_dir");
210 command_list.add(Gatherer.getGLIUserCacheDirectoryPath());
211
212 ArrayList all_arg = download.getArguments(true,false);
213 for(int i = 0; i < all_arg.size(); i++) {
214 Argument argument = (Argument) all_arg.get(i);
215 if(argument.isAssigned()) {
216 command_list.add("-" + argument.getName());
217 if(argument.getType() != Argument.FLAG) {
218 command_list.add(argument.getValue());
219 }
220 }
221 }
222
223 String [] cmd = (String []) command_list.toArray(new String[0]);
224 DebugStream.println("Download job, "+command_list);
225
226 if (previous_state == DownloadJob.COMPLETE) {
227 progress.mirrorBegun(true, true);
228 }
229 else {
230 progress.mirrorBegun(false, true);
231 }
232
233 try {
234 Runtime rt = Runtime.getRuntime();
235
236 String [] env = null;
237
238 Process prcs = null;
239
240
241 if (Utility.isWindows()) {
242 prcs = rt.exec(cmd);
243 }
244 else {
245 if (proxy_url != null && !proxy_url.equals("")) {
246 // Specify proxies as environment variables
247 // Need to manually specify GSDLHOME and GSDLOS also
248 env = new String[4];
249 proxy_url = proxy_url.replaceAll("http://","");
250 env[0] = "http_proxy=http://"+proxy_url;
251 env[1] = "ftp_proxy=ftp://"+proxy_url;
252 env[2] = "GSDLHOME=" + Configuration.gsdl_path;
253 env[3] = "GSDLOS=" + Gatherer.client_operating_system;
254 prcs = rt.exec(cmd, env);
255 }
256 else {
257 // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set
258 prcs = rt.exec(cmd);
259 }
260 }
261
262 //System.out.println(newcmd);
263
264 InputStreamReader isr = new InputStreamReader(prcs.getErrorStream());
265 BufferedReader br = new BufferedReader(isr);
266 // Capture the standard error stream and seach for two particular occurances.
267 String line="";
268 boolean ignore_for_robots = false;
269 int max_download = DownloadJob.UNKNOWN_MAX;
270
271
272 while ((line = br.readLine()) != null && !line.trim().equals("<<Finished>>") && state != STOPPED) {
273
274 if ( max_download == DownloadJob.UNKNOWN_MAX) {
275 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
276 max_download = DownloadJob.DEFINED_MAX;
277 }
278 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
279 max_download = DownloadJob.UNDEFINED_MAX;
280 }
281 }
282 else if(max_download == DownloadJob.UNDEFINED_MAX) {
283 DebugStream.println(line);
284 download_log.appendLine(line);
285 // The first magic special test is to see if we've just
286 // asked for the robots.txt file. If so we ignore
287 // the next add and then the next complete/error.
288 if(line.lastIndexOf("robots.txt;") != -1) {
289 DebugStream.println("***** Requesting robot.txt");
290 ignore_for_robots = true;
291 }
292 // If line contains "=> `" display text as the
293 // currently downloading url. Unique to add download.
294 else if(line.lastIndexOf("=> `") != -1) {
295 if(!ignore_for_robots) {
296 // Add download
297 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
298 //addDownload("http:/" + new_url);
299 }
300 }
301 // If line contains "/s) - `" set currently
302 // downloading url to "Download Complete".
303 else if(line.lastIndexOf("/s) - `") != -1) {
304 String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
305 if(!ignore_for_robots) {
306 DebugStream.println("Not ignore for robots");
307 // Download complete
308 downloadComplete(current_file_downloading);
309 }
310 else {
311 DebugStream.println("Ignore for robots");
312 ignore_for_robots = false;
313 }
314 }
315 // The already there line begins "File `..." However this
316 // is only true in english, so instead I looked and there
317 // are few (if any at all) other messages than those above
318 // and not overwriting messages that use " `" so we'll
319 // look for that. Note this method is not guarenteed to be
320 // unique like the previous two.
321 else if(line.lastIndexOf(" `") != -1) {
322 // Not Overwriting
323 DebugStream.println("Already there.");
324 String new_url =
325 line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
326 //addDownload("http:/" + new_url);
327 downloadWarning();
328 }
329 // Any other important message starts with the time in the form hh:mm:ss
330 else if(line.length() > 7) {
331 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
332 if(!ignore_for_robots) {
333 DebugStream.println("Error.");
334 downloadFailed();
335 }
336 else {
337 ignore_for_robots = false;
338 }
339 }
340 }
341 }
342 else if (max_download == DownloadJob.DEFINED_MAX) {
343 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
344 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
345 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
346 progress.resetFileCount();
347 }
348 else if (line.lastIndexOf("<<Done>>") != -1) {
349 progress.increaseFileCount();
350 }
351 else if(line.lastIndexOf("<<Done:") != -1) {
352 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
353 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
354 }
355
356 DebugStream.println(line);
357 download_log.appendLine(line);
358 }
359 else {
360 System.out.println("Error!!");
361 System.exit(-1);
362 }
363 }
364 if(state == STOPPED) {
365 isr.close();
366 prcs.destroy(); // This doesn't always work, but it's worth a try
367 }
368
369 }
370 catch (Exception ioe) {
371 //message(Utility.ERROR, ioe.toString());
372 //JTest
373 DebugStream.printStackTrace(ioe);
374 }
375 // If we've got to here and the state isn't STOPPED then the
376 // job is complete.
377 if(state == DownloadJob.RUNNING) {
378 progress.mirrorComplete();
379 previous_state = state;
380 state = DownloadJob.COMPLETE;
381
382 }
383 // refresh the workspace tree
384 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
385
386 }
387
388
389 /** Called by the WGet native code when the current download is
390 * completed. In turn all download listeners are informed.
391 */
392 public void downloadComplete() {
393 progress.downloadComplete();
394 }
395
396
397 public void downloadComplete(String current_file_downloading)
398 {
399 progress.downloadComplete();
400 DebugStream.println("Download complete: " + current_file_downloading);
401 }
402
403
404 /** Called by the WGet native code when the requested download returns
405 * a status code other than 200.
406 */
407 public void downloadFailed() {
408 // TODO!!
409 //failed_urls.add(current_url); // It is the current url that failed
410 progress.downloadFailed();
411 //DebugStream.println("Download failed: " + current_url);
412 }
413
414 /**
415 */
416 public void downloadWarning() {
417 progress.downloadWarning();
418 }
419
420
421 /**
422 * @return A String representing the initial urls host (root node
423 * of tree that we are mirroring).
424 */
425 public String getHost() {
426 return url.getHost();
427 }
428
429 public AppendLineOnlyFileDocument getLogDocument() {
430 return download_log;
431 }
432
433 /**
434 * @return Returns the progress bar associated with this job.
435 */
436 public DownloadProgressBar getProgressBar() {
437 return progress;
438 }
439
440 /** Called to discover if the user wanted this thread to run or if
441 * it is paused.
442 * @return An int representing the current DownloadJob state.
443 */
444 public int getState() {
445 return state;
446 }
447
448 /** Returns the current state of the stop flag for this job.
449 * @return A boolean representing whether the user has requested to
450 * stop.
451 */
452 public boolean hasSignalledStop() {
453 if(state == DownloadJob.STOPPED || state == DownloadJob.PAUSED ||
454 state == DownloadJob.COMPLETE) {
455 return true;
456 }
457 return false;
458 }
459
460 public void setState(int state) {
461 previous_state = this.state;
462 this.state = state;
463 }
464
465 /** A convenience call.
466 * @return A String representing the url of the initial url (root node of the mirrored tree).
467 */
468 public String toString() {
469 return download_url;
470 }
471
472 /** Called by the WGet native code to signal the current progress of
473 * downloading.
474 * @param current A long representing the number of bytes that have
475 * been downloaded since the last update.
476 * @param expected A long representing the total number of bytes
477 * expected for this download.
478 */
479 public void updateProgress(long current, long expected) {
480 progress.updateProgress(current, expected);
481 }
482}
Note: See TracBrowser for help on using the repository browser.