source: trunk/gli/src/org/greenstone/gatherer/download/DownloadJob.java@ 13032

Last change on this file since 13032 was 13016, checked in by mdewsnip, 18 years ago

Changed to use the GSDLOS argument now passed in, so downloading should now work on all OSs.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.6 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.download;
38
39import java.awt.event.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.tree.*;
44import org.greenstone.gatherer.Configuration;
45import org.greenstone.gatherer.DebugStream;
46import org.greenstone.gatherer.Dictionary;
47import org.greenstone.gatherer.Gatherer;
48import org.greenstone.gatherer.GAuthenticator;
49import org.greenstone.gatherer.LocalGreenstone;
50import org.greenstone.gatherer.file.WorkspaceTree;
51import org.greenstone.gatherer.util.AppendLineOnlyFileDocument;
52import org.greenstone.gatherer.util.GURL;
53import org.greenstone.gatherer.util.SynchronizedTreeModelTools;
54import org.greenstone.gatherer.util.Utility;
55import org.greenstone.gatherer.cdm.Argument;
56import org.greenstone.gatherer.collection.*;
57/**
58 * @author John Thompson, Greenstone Digital Library, University of Waikato
59 * @version 2.0
60 */
61public class DownloadJob
62 implements ActionListener {
63
64 private boolean debug;
65 private boolean higher_directories;
66 private boolean no_parents;
67 private boolean other_hosts;
68 private boolean page_requisites;
69 private boolean quiet;
70
71 private AppendLineOnlyFileDocument download_log;
72
73 private DownloadProgressBar progress;
74
75 private GURL initial = null;
76 private GURL url = null;
77
78
79 // private TreeModel model;
80
81 private int depth;
82 private int previous_state;
83 private int state;
84
85 private String download_url = "";
86
87 // private String current_url;
88 // private String destination;
89 private String proxy_pass;
90 private String proxy_user;
91
92 private Vector encountered_urls;
93 private Vector failed_urls;
94 private Download download;
95 private DownloadScrollPane mummy;
96 private HashMap download_option;
97
98 public static int COMPLETE = 0;
99 public static int PAUSED = 1;
100 public static int RUNNING = 2;
101 public static int STOPPED = 3;
102
103 public static int UNKNOWN_MAX = 0;
104 public static int DEFINED_MAX = 1;
105 public static int UNDEFINED_MAX = 2;
106
107 private String mode = null;
108
109 private String proxy_url;
110
111 /**
112 */
113 public DownloadJob(Download download, String proxy_pass, String proxy_user, DownloadScrollPane mummy, String mode, String proxy_url) {
114 URL url = null;
115 int folder_hash;
116
117 this.proxy_url = proxy_url;
118
119 download_option = downloadToHashMap(download);
120 if (!mode.equals("Z3950") && !mode.equals("SRW")) {
121 Argument url_arg = (Argument)download_option.get((String)"url");
122 download_url = url_arg.getValue();
123
124 }
125 else {
126 Argument host_arg = (Argument)download_option.get((String)"host");
127 Argument port_arg = (Argument)download_option.get((String)"port");
128 download_url = host_arg.getValue() + ":" +port_arg.getValue();
129 }
130
131 folder_hash = download_url.hashCode();
132 String log_filename = Utility.getLogDir(null) + "download-"+ mode + folder_hash + ".log";
133 File log_file = new File(log_filename);
134 if(log_file.exists()) {
135 log_file.delete();
136 }
137
138 File parent_log_file = log_file.getParentFile();
139 parent_log_file.mkdirs();
140 parent_log_file = null;
141 log_file = null;
142
143 this.download_log = new AppendLineOnlyFileDocument(log_filename, false);
144
145 this.proxy_pass = proxy_pass;
146 this.proxy_user = proxy_user;
147 this.mummy = mummy;
148 this.mode = mode;
149 this.download = download;
150
151 progress = new DownloadProgressBar(this,download_url, true);
152 encountered_urls = new Vector();
153 failed_urls = new Vector();
154
155 previous_state = STOPPED;
156 state = STOPPED;
157 }
158
159 private HashMap downloadToHashMap(Download download)
160 {
161 HashMap download_option = new HashMap();
162 ArrayList arguments = download.getArguments(true, false);
163 for(int i = 0; i < arguments.size(); i++) {
164 Argument argument = (Argument) arguments.get(i);
165 download_option.put(argument.getName(), argument);
166 }
167 return download_option;
168 }
169
170 /** Depending on which button on the progress bar was pushed,
171 * this method will affect the state of the DownloadJob and perhaps make
172 * calls to wget.class if necessary.
173 * @param event The ActionEvent fired from within the DownloadProgressBar
174 * which we must respond to.
175 */
176 public void actionPerformed(ActionEvent event) {
177 // The stop_start_button is used to alternately start or stop the
178 // job. If the current state of the job is paused then this
179 // restart is logically equivelent to a resume.
180 if(event.getSource() == progress.stop_start_button) {
181 previous_state = state;
182 if (state == RUNNING) {
183 state = STOPPED;
184 } else {
185 //previous_state = state;
186 state = RUNNING;
187 mummy.resumeThread();
188 }
189 }
190 else if (event.getSource() == progress.close_button) {
191 if(state == RUNNING) {
192 previous_state = state;
193 state = STOPPED; // do we need to do anything else to stop this?
194 }
195 mummy.deleteDownloadJob(this);
196 }
197 }
198
199
200 public void callDownload() {
201
202 ArrayList command_list = new ArrayList();
203 if (Utility.isWindows()) {
204 command_list.add(Configuration.perl_path);
205 command_list.add("-S");
206 }
207 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
208 command_list.add("-download_mode");
209 command_list.add(mode);
210 command_list.add("-cache_dir");
211 command_list.add(Gatherer.getGLIUserCacheDirectoryPath());
212
213 ArrayList all_arg = download.getArguments(true,false);
214 for(int i = 0; i < all_arg.size(); i++) {
215 Argument argument = (Argument) all_arg.get(i);
216 if(argument.isAssigned()) {
217 command_list.add("-" + argument.getName());
218 if(argument.getType() != Argument.FLAG) {
219 command_list.add(argument.getValue());
220 }
221 }
222 }
223
224 String [] cmd = (String []) command_list.toArray(new String[0]);
225 DebugStream.println("Download job, "+command_list);
226
227 if (previous_state == DownloadJob.COMPLETE) {
228 progress.mirrorBegun(true, true);
229 }
230 else {
231 progress.mirrorBegun(false, true);
232 }
233
234 try {
235 Runtime rt = Runtime.getRuntime();
236
237 String [] env = null;
238
239 Process prcs = null;
240
241
242 if (Utility.isWindows()) {
243 prcs = rt.exec(cmd);
244 }
245 else {
246 if (proxy_url != null && !proxy_url.equals("")) {
247 // Specify proxies as environment variables
248 // Need to manually specify GSDLHOME and GSDLOS also
249 env = new String[4];
250 proxy_url = proxy_url.replaceAll("http://","");
251 env[0] = "http_proxy=http://"+proxy_url;
252 env[1] = "ftp_proxy=ftp://"+proxy_url;
253 env[2] = "GSDLHOME=" + Configuration.gsdl_path;
254 env[3] = "GSDLOS=" + Gatherer.client_operating_system;
255 prcs = rt.exec(cmd, env);
256 }
257 else {
258 // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set
259 prcs = rt.exec(cmd);
260 }
261 }
262
263 //System.out.println(newcmd);
264
265 InputStreamReader isr = new InputStreamReader(prcs.getErrorStream());
266 BufferedReader br = new BufferedReader(isr);
267 // Capture the standard error stream and seach for two particular occurances.
268 String line="";
269 boolean ignore_for_robots = false;
270 int max_download = DownloadJob.UNKNOWN_MAX;
271
272
273 while ((line = br.readLine()) != null && !line.trim().equals("<<Finished>>") && state != STOPPED) {
274
275 if ( max_download == DownloadJob.UNKNOWN_MAX) {
276 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
277 max_download = DownloadJob.DEFINED_MAX;
278 }
279 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
280 max_download = DownloadJob.UNDEFINED_MAX;
281 }
282 }
283 else if(max_download == DownloadJob.UNDEFINED_MAX) {
284 DebugStream.println(line);
285 download_log.appendLine(line);
286 // The first magic special test is to see if we've just
287 // asked for the robots.txt file. If so we ignore
288 // the next add and then the next complete/error.
289 if(line.lastIndexOf("robots.txt;") != -1) {
290 DebugStream.println("***** Requesting robot.txt");
291 ignore_for_robots = true;
292 }
293 // If line contains "=> `" display text as the
294 // currently downloading url. Unique to add download.
295 else if(line.lastIndexOf("=> `") != -1) {
296 if(!ignore_for_robots) {
297 // Add download
298 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
299 //addDownload("http:/" + new_url);
300 }
301 }
302 // If line contains "/s) - `" set currently
303 // downloading url to "Download Complete".
304 else if(line.lastIndexOf("/s) - `") != -1) {
305 String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
306 if(!ignore_for_robots) {
307 DebugStream.println("Not ignore for robots");
308 // Download complete
309 downloadComplete(current_file_downloading);
310 }
311 else {
312 DebugStream.println("Ignore for robots");
313 ignore_for_robots = false;
314 }
315 }
316 // The already there line begins "File `..." However this
317 // is only true in english, so instead I looked and there
318 // are few (if any at all) other messages than those above
319 // and not overwriting messages that use " `" so we'll
320 // look for that. Note this method is not guarenteed to be
321 // unique like the previous two.
322 else if(line.lastIndexOf(" `") != -1) {
323 // Not Overwriting
324 DebugStream.println("Already there.");
325 String new_url =
326 line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
327 //addDownload("http:/" + new_url);
328 downloadWarning();
329 }
330 // Any other important message starts with the time in the form hh:mm:ss
331 else if(line.length() > 7) {
332 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
333 if(!ignore_for_robots) {
334 DebugStream.println("Error.");
335 downloadFailed();
336 }
337 else {
338 ignore_for_robots = false;
339 }
340 }
341 }
342 }
343 else if (max_download == DownloadJob.DEFINED_MAX) {
344 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
345 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
346 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
347 progress.resetFileCount();
348 }
349 else if (line.lastIndexOf("<<Done>>") != -1) {
350 progress.increaseFileCount();
351 }
352 else if(line.lastIndexOf("<<Done:") != -1) {
353 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
354 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
355 }
356
357 DebugStream.println(line);
358 download_log.appendLine(line);
359 }
360 else {
361 System.out.println("Error!!");
362 System.exit(-1);
363 }
364 }
365 if(state == STOPPED) {
366 isr.close();
367 prcs.destroy(); // This doesn't always work, but it's worth a try
368 }
369
370 }
371 catch (Exception ioe) {
372 //message(Utility.ERROR, ioe.toString());
373 //JTest
374 DebugStream.printStackTrace(ioe);
375 }
376 // If we've got to here and the state isn't STOPPED then the
377 // job is complete.
378 if(state == DownloadJob.RUNNING) {
379 progress.mirrorComplete();
380 previous_state = state;
381 state = DownloadJob.COMPLETE;
382
383 }
384 // refresh the workspace tree
385 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
386
387 }
388
389
390 /** Called by the WGet native code when the current download is
391 * completed. In turn all download listeners are informed.
392 */
393 public void downloadComplete() {
394 progress.downloadComplete();
395 }
396
397
398 public void downloadComplete(String current_file_downloading)
399 {
400 progress.downloadComplete();
401 DebugStream.println("Download complete: " + current_file_downloading);
402 }
403
404
405 /** Called by the WGet native code when the requested download returns
406 * a status code other than 200.
407 */
408 public void downloadFailed() {
409 // TODO!!
410 //failed_urls.add(current_url); // It is the current url that failed
411 progress.downloadFailed();
412 //DebugStream.println("Download failed: " + current_url);
413 }
414
415 /**
416 */
417 public void downloadWarning() {
418 progress.downloadWarning();
419 }
420
421
422 /**
423 * @return A String representing the initial urls host (root node
424 * of tree that we are mirroring).
425 */
426 public String getHost() {
427 return url.getHost();
428 }
429
430 public AppendLineOnlyFileDocument getLogDocument() {
431 return download_log;
432 }
433
434 /**
435 * @return Returns the progress bar associated with this job.
436 */
437 public DownloadProgressBar getProgressBar() {
438 return progress;
439 }
440
441 /** Called to discover if the user wanted this thread to run or if
442 * it is paused.
443 * @return An int representing the current DownloadJob state.
444 */
445 public int getState() {
446 return state;
447 }
448
449 /** Returns the current state of the stop flag for this job.
450 * @return A boolean representing whether the user has requested to
451 * stop.
452 */
453 public boolean hasSignalledStop() {
454 if(state == DownloadJob.STOPPED || state == DownloadJob.PAUSED ||
455 state == DownloadJob.COMPLETE) {
456 return true;
457 }
458 return false;
459 }
460
461 public void setState(int state) {
462 previous_state = this.state;
463 this.state = state;
464 }
465
466 /** A convenience call.
467 * @return A String representing the url of the initial url (root node of the mirrored tree).
468 */
469 public String toString() {
470 return download_url;
471 }
472
473 /** Called by the WGet native code to signal the current progress of
474 * downloading.
475 * @param current A long representing the number of bytes that have
476 * been downloaded since the last update.
477 * @param expected A long representing the total number of bytes
478 * expected for this download.
479 */
480 public void updateProgress(long current, long expected) {
481 progress.updateProgress(current, expected);
482 }
483}
Note: See TracBrowser for help on using the repository browser.