source: trunk/gli/src/org/greenstone/gatherer/download/DownloadJob.java@ 12529

Last change on this file since 12529 was 12529, checked in by mdewsnip, 18 years ago

Moved the cdm/download directory to download (it's got nothing to do with the cdm).

  • Property svn:keywords set to Author Date Id Revision
File size: 14.8 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.download;
38
39import java.awt.event.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.tree.*;
44import org.greenstone.gatherer.Configuration;
45import org.greenstone.gatherer.DebugStream;
46import org.greenstone.gatherer.Dictionary;
47import org.greenstone.gatherer.Gatherer;
48import org.greenstone.gatherer.GAuthenticator;
49import org.greenstone.gatherer.LocalGreenstone;
50import org.greenstone.gatherer.file.WorkspaceTree;
51import org.greenstone.gatherer.util.AppendLineOnlyFileDocument;
52import org.greenstone.gatherer.util.GURL;
53import org.greenstone.gatherer.util.SynchronizedTreeModelTools;
54import org.greenstone.gatherer.util.Utility;
55import org.greenstone.gatherer.cdm.Argument;
56import org.greenstone.gatherer.collection.*;
57/**
58 * @author John Thompson, Greenstone Digital Library, University of Waikato
59 * @version 2.0
60 */
61public class DownloadJob
62 implements ActionListener {
63
64 private boolean debug;
65 private boolean higher_directories;
66 private boolean no_parents;
67 private boolean other_hosts;
68 private boolean page_requisites;
69 private boolean quiet;
70
71 private AppendLineOnlyFileDocument download_log;
72
73 private DownloadProgressBar progress;
74
75 private GURL initial = null;
76 private GURL url = null;
77
78
79 // private TreeModel model;
80
81 private int depth;
82 private int previous_state;
83 private int state;
84
85 private String download_url = "";
86
87 // private String current_url;
88 // private String destination;
89 private String proxy_pass;
90 private String proxy_user;
91
92 private Vector encountered_urls;
93 private Vector failed_urls;
94 private Download download;
95 private DownloadScrollPane mummy;
96 private HashMap download_option;
97
98 public static int COMPLETE = 0;
99 public static int PAUSED = 1;
100 public static int RUNNING = 2;
101 public static int STOPPED = 3;
102
103 public static int UNKNOWN_MAX = 0;
104 public static int DEFINED_MAX = 1;
105 public static int UNDEFINED_MAX = 2;
106
107 private String mode = null;
108
109 private String proxy_url;
110
111 /**
112 */
113 public DownloadJob(Download download, String proxy_pass, String proxy_user, DownloadScrollPane mummy, String mode, String proxy_url) {
114 URL url = null;
115 int folder_hash;
116
117 this.proxy_url = proxy_url;
118
119 download_option = downloadToHashMap(download);
120 if (!mode.equals("Z3950") && !mode.equals("SRW")) {
121 Argument url_arg = (Argument)download_option.get((String)"url");
122 download_url = url_arg.getValue();
123
124 }
125 else {
126 Argument host_arg = (Argument)download_option.get((String)"host");
127 Argument port_arg = (Argument)download_option.get((String)"port");
128 download_url = host_arg.getValue() + ":" +port_arg.getValue();
129 }
130
131 folder_hash = download_url.hashCode();
132 String log_filename = Utility.getLogDir(null) + "download-"+ mode + folder_hash + ".log";
133 File log_file = new File(log_filename);
134 if(log_file.exists()) {
135 log_file.delete();
136 }
137
138 File parent_log_file = log_file.getParentFile();
139 parent_log_file.mkdirs();
140 parent_log_file = null;
141 log_file = null;
142
143 this.download_log = new AppendLineOnlyFileDocument(log_filename, false);
144
145 this.proxy_pass = proxy_pass;
146 this.proxy_user = proxy_user;
147 this.mummy = mummy;
148 this.mode = mode;
149 this.download = download;
150
151 progress = new DownloadProgressBar(this,download_url, true);
152 encountered_urls = new Vector();
153 failed_urls = new Vector();
154
155 previous_state = STOPPED;
156 state = STOPPED;
157 }
158
159 private HashMap downloadToHashMap(Download download)
160 {
161 HashMap download_option = new HashMap();
162 ArrayList arguments = download.getArguments(true, false);
163 for(int i = 0; i < arguments.size(); i++) {
164 Argument argument = (Argument) arguments.get(i);
165 download_option.put(argument.getName(), argument);
166 }
167 return download_option;
168 }
169
170 /** Depending on which button on the progress bar was pushed,
171 * this method will affect the state of the DownloadJob and perhaps make
172 * calls to wget.class if necessary.
173 * @param event The ActionEvent fired from within the DownloadProgressBar
174 * which we must respond to.
175 */
176 public void actionPerformed(ActionEvent event) {
177 // The stop_start_button is used to alternately start or stop the
178 // job. If the current state of the job is paused then this
179 // restart is logically equivelent to a resume.
180 if(event.getSource() == progress.stop_start_button) {
181 previous_state = state;
182 if (state == RUNNING) {
183 state = STOPPED;
184 } else {
185 //previous_state = state;
186 state = RUNNING;
187 mummy.resumeThread();
188 }
189 }
190 else if (event.getSource() == progress.close_button) {
191 if(state == RUNNING) {
192 previous_state = state;
193 state = STOPPED; // do we need to do anything else to stop this?
194 }
195 mummy.deleteDownloadJob(this);
196 }
197 }
198
199
200 public void callDownload() {
201
202 ArrayList command_list = new ArrayList();
203 if (Utility.isWindows()) {
204 command_list.add(Configuration.perl_path);
205 command_list.add("-S");
206 }
207 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
208 command_list.add("-download_mode");
209 command_list.add(mode);
210 command_list.add("-cache_dir");
211
212 String user_home = System.getProperty("user.home");
213 if (Utility.isWindows()){
214 command_list.add(user_home + File.separator+"gli"+File.separator + "cache");
215 }
216 else{
217 command_list.add(user_home + File.separator+".gli"+File.separator +"cache");
218 }
219
220 ArrayList all_arg = download.getArguments(true,false);
221 for(int i = 0; i < all_arg.size(); i++) {
222 Argument argument = (Argument) all_arg.get(i);
223 if(argument.isAssigned()) {
224 command_list.add("-" + argument.getName());
225 if(argument.getType() != Argument.FLAG) {
226 command_list.add(argument.getValue());
227 }
228 }
229 }
230
231 String [] cmd = (String []) command_list.toArray(new String[0]);
232 DebugStream.println("Download job, "+command_list);
233
234 if (previous_state == DownloadJob.COMPLETE) {
235 progress.mirrorBegun(true, true);
236 }
237 else {
238 progress.mirrorBegun(false, true);
239 }
240
241 try {
242 Runtime rt = Runtime.getRuntime();
243
244 String [] env = null;
245
246 Process prcs = null;
247
248
249 if (Utility.isWindows()) {
250 prcs = rt.exec(cmd);
251 }
252 else {
253 if (proxy_url != null && !proxy_url.equals("")) {
254 env = new String[4];
255 proxy_url = proxy_url.replaceAll("http://","");
256 env[0] = "http_proxy=http://"+proxy_url;
257 env[1] = "ftp_proxy=ftp://"+proxy_url;
258 env[2] = "GSDLHOME="+Configuration.gsdl_path;
259 env[3] = "GSDLOS="+System.getProperty("os.name");
260 prcs = rt.exec(cmd,env);
261 }
262 else {
263 env = new String[2];
264
265 env[0] = "GSDLHOME="+Configuration.gsdl_path;
266 env[1] = "GSDLOS="+System.getProperty("os.name");
267 prcs = rt.exec(cmd,env);
268 }
269
270 }
271
272 //System.out.println(newcmd);
273
274 InputStreamReader isr = new InputStreamReader(prcs.getErrorStream());
275 BufferedReader br = new BufferedReader(isr);
276 // Capture the standard error stream and seach for two particular occurances.
277 String line="";
278 boolean ignore_for_robots = false;
279 int max_download = DownloadJob.UNKNOWN_MAX;
280
281
282 while ((line = br.readLine()) != null && !line.trim().equals("<<Finished>>") && state != STOPPED) {
283
284 if ( max_download == DownloadJob.UNKNOWN_MAX) {
285 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
286 max_download = DownloadJob.DEFINED_MAX;
287 }
288 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
289 max_download = DownloadJob.UNDEFINED_MAX;
290 }
291 }
292 else if(max_download == DownloadJob.UNDEFINED_MAX) {
293 DebugStream.println(line);
294 download_log.appendLine(line);
295 // The first magic special test is to see if we've just
296 // asked for the robots.txt file. If so we ignore
297 // the next add and then the next complete/error.
298 if(line.lastIndexOf("robots.txt;") != -1) {
299 DebugStream.println("***** Requesting robot.txt");
300 ignore_for_robots = true;
301 }
302 // If line contains "=> `" display text as the
303 // currently downloading url. Unique to add download.
304 else if(line.lastIndexOf("=> `") != -1) {
305 if(!ignore_for_robots) {
306 // Add download
307 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
308 //addDownload("http:/" + new_url);
309 }
310 }
311 // If line contains "/s) - `" set currently
312 // downloading url to "Download Complete".
313 else if(line.lastIndexOf("/s) - `") != -1) {
314 String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
315 if(!ignore_for_robots) {
316 DebugStream.println("Not ignore for robots");
317 // Download complete
318 downloadComplete(current_file_downloading);
319 }
320 else {
321 DebugStream.println("Ignore for robots");
322 ignore_for_robots = false;
323 }
324 }
325 // The already there line begins "File `..." However this
326 // is only true in english, so instead I looked and there
327 // are few (if any at all) other messages than those above
328 // and not overwriting messages that use " `" so we'll
329 // look for that. Note this method is not guarenteed to be
330 // unique like the previous two.
331 else if(line.lastIndexOf(" `") != -1) {
332 // Not Overwriting
333 DebugStream.println("Already there.");
334 String new_url =
335 line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
336 //addDownload("http:/" + new_url);
337 downloadWarning();
338 }
339 // Any other important message starts with the time in the form hh:mm:ss
340 else if(line.length() > 7) {
341 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
342 if(!ignore_for_robots) {
343 DebugStream.println("Error.");
344 downloadFailed();
345 }
346 else {
347 ignore_for_robots = false;
348 }
349 }
350 }
351 }
352 else if (max_download == DownloadJob.DEFINED_MAX) {
353 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
354 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
355 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
356 progress.resetFileCount();
357 }
358 else if (line.lastIndexOf("<<Done>>") != -1) {
359 progress.increaseFileCount();
360 }
361 else if(line.lastIndexOf("<<Done:") != -1) {
362 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
363 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
364 }
365
366 DebugStream.println(line);
367 download_log.appendLine(line);
368 }
369 else {
370 System.out.println("Error!!");
371 System.exit(-1);
372 }
373 }
374 if(state == STOPPED) {
375 isr.close();
376 prcs.destroy(); // This doesn't always work, but it's worth a try
377 }
378
379 }
380 catch (Exception ioe) {
381 //message(Utility.ERROR, ioe.toString());
382 //JTest
383 DebugStream.printStackTrace(ioe);
384 }
385 // If we've got to here and the state isn't STOPPED then the
386 // job is complete.
387 if(state == DownloadJob.RUNNING) {
388 progress.mirrorComplete();
389 previous_state = state;
390 state = DownloadJob.COMPLETE;
391
392 }
393 // refresh the workspace tree
394 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
395
396 }
397
398
399 /** Called by the WGet native code when the current download is
400 * completed. In turn all download listeners are informed.
401 */
402 public void downloadComplete() {
403 progress.downloadComplete();
404 }
405
406
407 public void downloadComplete(String current_file_downloading)
408 {
409 progress.downloadComplete();
410 DebugStream.println("Download complete: " + current_file_downloading);
411 }
412
413
414 /** Called by the WGet native code when the requested download returns
415 * a status code other than 200.
416 */
417 public void downloadFailed() {
418 // TODO!!
419 //failed_urls.add(current_url); // It is the current url that failed
420 progress.downloadFailed();
421 //DebugStream.println("Download failed: " + current_url);
422 }
423
424 /**
425 */
426 public void downloadWarning() {
427 progress.downloadWarning();
428 }
429
430
431 /**
432 * @return A String representing the initial urls host (root node
433 * of tree that we are mirroring).
434 */
435 public String getHost() {
436 return url.getHost();
437 }
438
439 public AppendLineOnlyFileDocument getLogDocument() {
440 return download_log;
441 }
442
443 /**
444 * @return Returns the progress bar associated with this job.
445 */
446 public DownloadProgressBar getProgressBar() {
447 return progress;
448 }
449
450 /** Called to discover if the user wanted this thread to run or if
451 * it is paused.
452 * @return An int representing the current DownloadJob state.
453 */
454 public int getState() {
455 return state;
456 }
457
458 /** Returns the current state of the stop flag for this job.
459 * @return A boolean representing whether the user has requested to
460 * stop.
461 */
462 public boolean hasSignalledStop() {
463 if(state == DownloadJob.STOPPED || state == DownloadJob.PAUSED ||
464 state == DownloadJob.COMPLETE) {
465 return true;
466 }
467 return false;
468 }
469
470 public void setState(int state) {
471 previous_state = this.state;
472 this.state = state;
473 }
474
475 /** A convenience call.
476 * @return A String representing the url of the initial url (root node of the mirrored tree).
477 */
478 public String toString() {
479 return download_url;
480 }
481
482 /** Called by the WGet native code to signal the current progress of
483 * downloading.
484 * @param current A long representing the number of bytes that have
485 * been downloaded since the last update.
486 * @param expected A long representing the total number of bytes
487 * expected for this download.
488 */
489 public void updateProgress(long current, long expected) {
490 progress.updateProgress(current, expected);
491 }
492}
Note: See TracBrowser for help on using the repository browser.