source: trunk/gli/src/org/greenstone/gatherer/download/DownloadJob.java@ 13533

Last change on this file since 13533 was 13533, checked in by mdewsnip, 17 years ago

The GURL class is no longer used.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.2 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.download;
38
39import java.awt.event.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.tree.*;
44import org.greenstone.gatherer.Configuration;
45import org.greenstone.gatherer.DebugStream;
46import org.greenstone.gatherer.Dictionary;
47import org.greenstone.gatherer.Gatherer;
48import org.greenstone.gatherer.GAuthenticator;
49import org.greenstone.gatherer.LocalGreenstone;
50import org.greenstone.gatherer.file.WorkspaceTree;
51import org.greenstone.gatherer.util.AppendLineOnlyFileDocument;
52import org.greenstone.gatherer.util.Utility;
53import org.greenstone.gatherer.cdm.Argument;
54import org.greenstone.gatherer.collection.*;
55/**
56 * @author John Thompson, Greenstone Digital Library, University of Waikato
57 * @version 2.0
58 */
59public class DownloadJob
60 implements ActionListener {
61
62 private boolean debug;
63 private boolean higher_directories;
64 private boolean no_parents;
65 private boolean other_hosts;
66 private boolean page_requisites;
67 private boolean quiet;
68
69 private AppendLineOnlyFileDocument download_log;
70
71 private DownloadProgressBar progress;
72
73 private int depth;
74 private int previous_state;
75 private int state;
76
77 private String download_url = "";
78
79 // private String current_url;
80 // private String destination;
81 private String proxy_pass;
82 private String proxy_user;
83
84 private Vector encountered_urls;
85 private Vector failed_urls;
86 private Download download;
87 private DownloadScrollPane mummy;
88 private HashMap download_option;
89
90 public static int COMPLETE = 0;
91 public static int PAUSED = 1;
92 public static int RUNNING = 2;
93 public static int STOPPED = 3;
94
95 public static int UNKNOWN_MAX = 0;
96 public static int DEFINED_MAX = 1;
97 public static int UNDEFINED_MAX = 2;
98
99 private String mode = null;
100
101 private String proxy_url;
102
103 /**
104 */
105 public DownloadJob(Download download, String proxy_pass, String proxy_user, DownloadScrollPane mummy, String mode, String proxy_url) {
106 URL url = null;
107 int folder_hash;
108
109 this.proxy_url = proxy_url;
110
111 download_option = downloadToHashMap(download);
112 if (!mode.equals("Z3950") && !mode.equals("SRW")) {
113 Argument url_arg = (Argument)download_option.get((String)"url");
114 download_url = url_arg.getValue();
115
116 }
117 else {
118 Argument host_arg = (Argument)download_option.get((String)"host");
119 Argument port_arg = (Argument)download_option.get((String)"port");
120 download_url = host_arg.getValue() + ":" +port_arg.getValue();
121 }
122
123 folder_hash = download_url.hashCode();
124 String log_filename = Utility.getLogDir(null) + "download-"+ mode + folder_hash + ".log";
125 File log_file = new File(log_filename);
126 if(log_file.exists()) {
127 log_file.delete();
128 }
129
130 File parent_log_file = log_file.getParentFile();
131 parent_log_file.mkdirs();
132 parent_log_file = null;
133 log_file = null;
134
135 this.download_log = new AppendLineOnlyFileDocument(log_filename, false);
136
137 this.proxy_pass = proxy_pass;
138 this.proxy_user = proxy_user;
139 this.mummy = mummy;
140 this.mode = mode;
141 this.download = download;
142
143 progress = new DownloadProgressBar(this,download_url, true);
144 encountered_urls = new Vector();
145 failed_urls = new Vector();
146
147 previous_state = STOPPED;
148 state = STOPPED;
149 }
150
151 private HashMap downloadToHashMap(Download download)
152 {
153 HashMap download_option = new HashMap();
154 ArrayList arguments = download.getArguments(true, false);
155 for(int i = 0; i < arguments.size(); i++) {
156 Argument argument = (Argument) arguments.get(i);
157 download_option.put(argument.getName(), argument);
158 }
159 return download_option;
160 }
161
162 /** Depending on which button on the progress bar was pushed,
163 * this method will affect the state of the DownloadJob and perhaps make
164 * calls to wget.class if necessary.
165 * @param event The ActionEvent fired from within the DownloadProgressBar
166 * which we must respond to.
167 */
168 public void actionPerformed(ActionEvent event) {
169 // The stop_start_button is used to alternately start or stop the
170 // job. If the current state of the job is paused then this
171 // restart is logically equivelent to a resume.
172 if(event.getSource() == progress.stop_start_button) {
173 previous_state = state;
174 if (state == RUNNING) {
175 state = STOPPED;
176 } else {
177 //previous_state = state;
178 state = RUNNING;
179 mummy.resumeThread();
180 }
181 }
182 else if (event.getSource() == progress.close_button) {
183 if(state == RUNNING) {
184 previous_state = state;
185 state = STOPPED; // do we need to do anything else to stop this?
186 }
187 mummy.deleteDownloadJob(this);
188 }
189 }
190
191
192 public void callDownload() {
193
194 ArrayList command_list = new ArrayList();
195 if (Utility.isWindows()) {
196 command_list.add(Configuration.perl_path);
197 command_list.add("-S");
198 }
199 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
200 command_list.add("-download_mode");
201 command_list.add(mode);
202 command_list.add("-cache_dir");
203 command_list.add(Gatherer.getGLIUserCacheDirectoryPath());
204
205 ArrayList all_arg = download.getArguments(true,false);
206 for(int i = 0; i < all_arg.size(); i++) {
207 Argument argument = (Argument) all_arg.get(i);
208 if(argument.isAssigned()) {
209 command_list.add("-" + argument.getName());
210 if(argument.getType() != Argument.FLAG) {
211 command_list.add(argument.getValue());
212 }
213 }
214 }
215
216 String [] cmd = (String []) command_list.toArray(new String[0]);
217 DebugStream.println("Download job, "+command_list);
218
219 if (previous_state == DownloadJob.COMPLETE) {
220 progress.mirrorBegun(true, true);
221 }
222 else {
223 progress.mirrorBegun(false, true);
224 }
225
226 try {
227 Runtime rt = Runtime.getRuntime();
228
229 String [] env = null;
230
231 Process prcs = null;
232
233
234 if (Utility.isWindows()) {
235 prcs = rt.exec(cmd);
236 }
237 else {
238 if (proxy_url != null && !proxy_url.equals("")) {
239 // Specify proxies as environment variables
240 // Need to manually specify GSDLHOME and GSDLOS also
241 env = new String[4];
242 proxy_url = proxy_url.replaceAll("http://","");
243 env[0] = "http_proxy=http://"+proxy_url;
244 env[1] = "ftp_proxy=ftp://"+proxy_url;
245 env[2] = "GSDLHOME=" + Configuration.gsdl_path;
246 env[3] = "GSDLOS=" + Gatherer.client_operating_system;
247 prcs = rt.exec(cmd, env);
248 }
249 else {
250 // Will inherit the GLI's environment, with GSDLHOME and GSDLOS set
251 prcs = rt.exec(cmd);
252 }
253 }
254
255 //System.out.println(newcmd);
256
257 InputStreamReader isr = new InputStreamReader(prcs.getErrorStream());
258 BufferedReader br = new BufferedReader(isr);
259 // Capture the standard error stream and seach for two particular occurances.
260 String line="";
261 boolean ignore_for_robots = false;
262 int max_download = DownloadJob.UNKNOWN_MAX;
263
264
265 while ((line = br.readLine()) != null && !line.trim().equals("<<Finished>>") && state != STOPPED) {
266
267 if ( max_download == DownloadJob.UNKNOWN_MAX) {
268 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
269 max_download = DownloadJob.DEFINED_MAX;
270 }
271 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
272 max_download = DownloadJob.UNDEFINED_MAX;
273 }
274 }
275 else if(max_download == DownloadJob.UNDEFINED_MAX) {
276 DebugStream.println(line);
277 download_log.appendLine(line);
278 // The first magic special test is to see if we've just
279 // asked for the robots.txt file. If so we ignore
280 // the next add and then the next complete/error.
281 if(line.lastIndexOf("robots.txt;") != -1) {
282 DebugStream.println("***** Requesting robot.txt");
283 ignore_for_robots = true;
284 }
285 // If line contains "=> `" display text as the
286 // currently downloading url. Unique to add download.
287 else if(line.lastIndexOf("=> `") != -1) {
288 if(!ignore_for_robots) {
289 // Add download
290 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
291 //addDownload("http:/" + new_url);
292 }
293 }
294 // If line contains "/s) - `" set currently
295 // downloading url to "Download Complete".
296 else if(line.lastIndexOf("/s) - `") != -1) {
297 String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
298 if(!ignore_for_robots) {
299 DebugStream.println("Not ignore for robots");
300 // Download complete
301 downloadComplete(current_file_downloading);
302 }
303 else {
304 DebugStream.println("Ignore for robots");
305 ignore_for_robots = false;
306 }
307 }
308 // The already there line begins "File `..." However this
309 // is only true in english, so instead I looked and there
310 // are few (if any at all) other messages than those above
311 // and not overwriting messages that use " `" so we'll
312 // look for that. Note this method is not guarenteed to be
313 // unique like the previous two.
314 else if(line.lastIndexOf(" `") != -1) {
315 // Not Overwriting
316 DebugStream.println("Already there.");
317 String new_url =
318 line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
319 //addDownload("http:/" + new_url);
320 downloadWarning();
321 }
322 // Any other important message starts with the time in the form hh:mm:ss
323 else if(line.length() > 7) {
324 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
325 if(!ignore_for_robots) {
326 DebugStream.println("Error.");
327 downloadFailed();
328 }
329 else {
330 ignore_for_robots = false;
331 }
332 }
333 }
334 }
335 else if (max_download == DownloadJob.DEFINED_MAX) {
336 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
337 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
338 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
339 progress.resetFileCount();
340 }
341 else if (line.lastIndexOf("<<Done>>") != -1) {
342 progress.increaseFileCount();
343 }
344 else if(line.lastIndexOf("<<Done:") != -1) {
345 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
346 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
347 }
348
349 DebugStream.println(line);
350 download_log.appendLine(line);
351 }
352 else {
353 System.out.println("Error!!");
354 System.exit(-1);
355 }
356 }
357 if(state == STOPPED) {
358 isr.close();
359 prcs.destroy(); // This doesn't always work, but it's worth a try
360 }
361
362 }
363 catch (Exception ioe) {
364 //message(Utility.ERROR, ioe.toString());
365 //JTest
366 DebugStream.printStackTrace(ioe);
367 }
368 // If we've got to here and the state isn't STOPPED then the
369 // job is complete.
370 if(state == DownloadJob.RUNNING) {
371 progress.mirrorComplete();
372 previous_state = state;
373 state = DownloadJob.COMPLETE;
374
375 }
376 // refresh the workspace tree
377 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
378
379 }
380
381
382 /** Called by the WGet native code when the current download is
383 * completed. In turn all download listeners are informed.
384 */
385 public void downloadComplete() {
386 progress.downloadComplete();
387 }
388
389
390 public void downloadComplete(String current_file_downloading)
391 {
392 progress.downloadComplete();
393 DebugStream.println("Download complete: " + current_file_downloading);
394 }
395
396
397 /** Called by the WGet native code when the requested download returns
398 * a status code other than 200.
399 */
400 public void downloadFailed() {
401 // TODO!!
402 //failed_urls.add(current_url); // It is the current url that failed
403 progress.downloadFailed();
404 //DebugStream.println("Download failed: " + current_url);
405 }
406
407 /**
408 */
409 public void downloadWarning() {
410 progress.downloadWarning();
411 }
412
413 public AppendLineOnlyFileDocument getLogDocument() {
414 return download_log;
415 }
416
417 /**
418 * @return Returns the progress bar associated with this job.
419 */
420 public DownloadProgressBar getProgressBar() {
421 return progress;
422 }
423
424 /** Called to discover if the user wanted this thread to run or if
425 * it is paused.
426 * @return An int representing the current DownloadJob state.
427 */
428 public int getState() {
429 return state;
430 }
431
432 /** Returns the current state of the stop flag for this job.
433 * @return A boolean representing whether the user has requested to
434 * stop.
435 */
436 public boolean hasSignalledStop() {
437 if(state == DownloadJob.STOPPED || state == DownloadJob.PAUSED ||
438 state == DownloadJob.COMPLETE) {
439 return true;
440 }
441 return false;
442 }
443
444 public void setState(int state) {
445 previous_state = this.state;
446 this.state = state;
447 }
448
449 /** A convenience call.
450 * @return A String representing the url of the initial url (root node of the mirrored tree).
451 */
452 public String toString() {
453 return download_url;
454 }
455
456 /** Called by the WGet native code to signal the current progress of
457 * downloading.
458 * @param current A long representing the number of bytes that have
459 * been downloaded since the last update.
460 * @param expected A long representing the total number of bytes
461 * expected for this download.
462 */
463 public void updateProgress(long current, long expected) {
464 progress.updateProgress(current, expected);
465 }
466}
Note: See TracBrowser for help on using the repository browser.