source: trunk/gli/src/org/greenstone/gatherer/download/DownloadJob.java@ 12588

Last change on this file since 12588 was 12588, checked in by kjdon, 18 years ago

use getGLIUserCacheDirectoryPath to get cache_dir instead of working it out from scratch

  • Property svn:keywords set to Author Date Id Revision
File size: 14.6 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.download;
38
39import java.awt.event.*;
40import java.io.*;
41import java.net.*;
42import java.util.*;
43import javax.swing.tree.*;
44import org.greenstone.gatherer.Configuration;
45import org.greenstone.gatherer.DebugStream;
46import org.greenstone.gatherer.Dictionary;
47import org.greenstone.gatherer.Gatherer;
48import org.greenstone.gatherer.GAuthenticator;
49import org.greenstone.gatherer.LocalGreenstone;
50import org.greenstone.gatherer.file.WorkspaceTree;
51import org.greenstone.gatherer.util.AppendLineOnlyFileDocument;
52import org.greenstone.gatherer.util.GURL;
53import org.greenstone.gatherer.util.SynchronizedTreeModelTools;
54import org.greenstone.gatherer.util.Utility;
55import org.greenstone.gatherer.cdm.Argument;
56import org.greenstone.gatherer.collection.*;
57/**
58 * @author John Thompson, Greenstone Digital Library, University of Waikato
59 * @version 2.0
60 */
61public class DownloadJob
62 implements ActionListener {
63
64 private boolean debug;
65 private boolean higher_directories;
66 private boolean no_parents;
67 private boolean other_hosts;
68 private boolean page_requisites;
69 private boolean quiet;
70
71 private AppendLineOnlyFileDocument download_log;
72
73 private DownloadProgressBar progress;
74
75 private GURL initial = null;
76 private GURL url = null;
77
78
79 // private TreeModel model;
80
81 private int depth;
82 private int previous_state;
83 private int state;
84
85 private String download_url = "";
86
87 // private String current_url;
88 // private String destination;
89 private String proxy_pass;
90 private String proxy_user;
91
92 private Vector encountered_urls;
93 private Vector failed_urls;
94 private Download download;
95 private DownloadScrollPane mummy;
96 private HashMap download_option;
97
98 public static int COMPLETE = 0;
99 public static int PAUSED = 1;
100 public static int RUNNING = 2;
101 public static int STOPPED = 3;
102
103 public static int UNKNOWN_MAX = 0;
104 public static int DEFINED_MAX = 1;
105 public static int UNDEFINED_MAX = 2;
106
107 private String mode = null;
108
109 private String proxy_url;
110
111 /**
112 */
113 public DownloadJob(Download download, String proxy_pass, String proxy_user, DownloadScrollPane mummy, String mode, String proxy_url) {
114 URL url = null;
115 int folder_hash;
116
117 this.proxy_url = proxy_url;
118
119 download_option = downloadToHashMap(download);
120 if (!mode.equals("Z3950") && !mode.equals("SRW")) {
121 Argument url_arg = (Argument)download_option.get((String)"url");
122 download_url = url_arg.getValue();
123
124 }
125 else {
126 Argument host_arg = (Argument)download_option.get((String)"host");
127 Argument port_arg = (Argument)download_option.get((String)"port");
128 download_url = host_arg.getValue() + ":" +port_arg.getValue();
129 }
130
131 folder_hash = download_url.hashCode();
132 String log_filename = Utility.getLogDir(null) + "download-"+ mode + folder_hash + ".log";
133 File log_file = new File(log_filename);
134 if(log_file.exists()) {
135 log_file.delete();
136 }
137
138 File parent_log_file = log_file.getParentFile();
139 parent_log_file.mkdirs();
140 parent_log_file = null;
141 log_file = null;
142
143 this.download_log = new AppendLineOnlyFileDocument(log_filename, false);
144
145 this.proxy_pass = proxy_pass;
146 this.proxy_user = proxy_user;
147 this.mummy = mummy;
148 this.mode = mode;
149 this.download = download;
150
151 progress = new DownloadProgressBar(this,download_url, true);
152 encountered_urls = new Vector();
153 failed_urls = new Vector();
154
155 previous_state = STOPPED;
156 state = STOPPED;
157 }
158
159 private HashMap downloadToHashMap(Download download)
160 {
161 HashMap download_option = new HashMap();
162 ArrayList arguments = download.getArguments(true, false);
163 for(int i = 0; i < arguments.size(); i++) {
164 Argument argument = (Argument) arguments.get(i);
165 download_option.put(argument.getName(), argument);
166 }
167 return download_option;
168 }
169
170 /** Depending on which button on the progress bar was pushed,
171 * this method will affect the state of the DownloadJob and perhaps make
172 * calls to wget.class if necessary.
173 * @param event The ActionEvent fired from within the DownloadProgressBar
174 * which we must respond to.
175 */
176 public void actionPerformed(ActionEvent event) {
177 // The stop_start_button is used to alternately start or stop the
178 // job. If the current state of the job is paused then this
179 // restart is logically equivelent to a resume.
180 if(event.getSource() == progress.stop_start_button) {
181 previous_state = state;
182 if (state == RUNNING) {
183 state = STOPPED;
184 } else {
185 //previous_state = state;
186 state = RUNNING;
187 mummy.resumeThread();
188 }
189 }
190 else if (event.getSource() == progress.close_button) {
191 if(state == RUNNING) {
192 previous_state = state;
193 state = STOPPED; // do we need to do anything else to stop this?
194 }
195 mummy.deleteDownloadJob(this);
196 }
197 }
198
199
200 public void callDownload() {
201
202 ArrayList command_list = new ArrayList();
203 if (Utility.isWindows()) {
204 command_list.add(Configuration.perl_path);
205 command_list.add("-S");
206 }
207 command_list.add(LocalGreenstone.getBinScriptDirectoryPath()+"downloadfrom.pl");
208 command_list.add("-download_mode");
209 command_list.add(mode);
210 command_list.add("-cache_dir");
211 command_list.add(Gatherer.getGLIUserCacheDirectoryPath());
212
213 ArrayList all_arg = download.getArguments(true,false);
214 for(int i = 0; i < all_arg.size(); i++) {
215 Argument argument = (Argument) all_arg.get(i);
216 if(argument.isAssigned()) {
217 command_list.add("-" + argument.getName());
218 if(argument.getType() != Argument.FLAG) {
219 command_list.add(argument.getValue());
220 }
221 }
222 }
223
224 String [] cmd = (String []) command_list.toArray(new String[0]);
225 DebugStream.println("Download job, "+command_list);
226
227 if (previous_state == DownloadJob.COMPLETE) {
228 progress.mirrorBegun(true, true);
229 }
230 else {
231 progress.mirrorBegun(false, true);
232 }
233
234 try {
235 Runtime rt = Runtime.getRuntime();
236
237 String [] env = null;
238
239 Process prcs = null;
240
241
242 if (Utility.isWindows()) {
243 prcs = rt.exec(cmd);
244 }
245 else {
246 if (proxy_url != null && !proxy_url.equals("")) {
247 env = new String[4];
248 proxy_url = proxy_url.replaceAll("http://","");
249 env[0] = "http_proxy=http://"+proxy_url;
250 env[1] = "ftp_proxy=ftp://"+proxy_url;
251 env[2] = "GSDLHOME="+Configuration.gsdl_path;
252 env[3] = "GSDLOS="+System.getProperty("os.name");
253 prcs = rt.exec(cmd,env);
254 }
255 else {
256 env = new String[2];
257
258 env[0] = "GSDLHOME="+Configuration.gsdl_path;
259 env[1] = "GSDLOS="+System.getProperty("os.name");
260 prcs = rt.exec(cmd,env);
261 }
262
263 }
264
265 //System.out.println(newcmd);
266
267 InputStreamReader isr = new InputStreamReader(prcs.getErrorStream());
268 BufferedReader br = new BufferedReader(isr);
269 // Capture the standard error stream and seach for two particular occurances.
270 String line="";
271 boolean ignore_for_robots = false;
272 int max_download = DownloadJob.UNKNOWN_MAX;
273
274
275 while ((line = br.readLine()) != null && !line.trim().equals("<<Finished>>") && state != STOPPED) {
276
277 if ( max_download == DownloadJob.UNKNOWN_MAX) {
278 if(line.lastIndexOf("<<Defined Maximum>>") != -1) {
279 max_download = DownloadJob.DEFINED_MAX;
280 }
281 else if (line.lastIndexOf("<<Undefined Maximum>>") != -1) {
282 max_download = DownloadJob.UNDEFINED_MAX;
283 }
284 }
285 else if(max_download == DownloadJob.UNDEFINED_MAX) {
286 DebugStream.println(line);
287 download_log.appendLine(line);
288 // The first magic special test is to see if we've just
289 // asked for the robots.txt file. If so we ignore
290 // the next add and then the next complete/error.
291 if(line.lastIndexOf("robots.txt;") != -1) {
292 DebugStream.println("***** Requesting robot.txt");
293 ignore_for_robots = true;
294 }
295 // If line contains "=> `" display text as the
296 // currently downloading url. Unique to add download.
297 else if(line.lastIndexOf("=> `") != -1) {
298 if(!ignore_for_robots) {
299 // Add download
300 String new_url = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
301 //addDownload("http:/" + new_url);
302 }
303 }
304 // If line contains "/s) - `" set currently
305 // downloading url to "Download Complete".
306 else if(line.lastIndexOf("/s) - `") != -1) {
307 String current_file_downloading = line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
308 if(!ignore_for_robots) {
309 DebugStream.println("Not ignore for robots");
310 // Download complete
311 downloadComplete(current_file_downloading);
312 }
313 else {
314 DebugStream.println("Ignore for robots");
315 ignore_for_robots = false;
316 }
317 }
318 // The already there line begins "File `..." However this
319 // is only true in english, so instead I looked and there
320 // are few (if any at all) other messages than those above
321 // and not overwriting messages that use " `" so we'll
322 // look for that. Note this method is not guarenteed to be
323 // unique like the previous two.
324 else if(line.lastIndexOf(" `") != -1) {
325 // Not Overwriting
326 DebugStream.println("Already there.");
327 String new_url =
328 line.substring(line.indexOf("`") + 1, line.lastIndexOf("'"));
329 //addDownload("http:/" + new_url);
330 downloadWarning();
331 }
332 // Any other important message starts with the time in the form hh:mm:ss
333 else if(line.length() > 7) {
334 if(line.charAt(2) == ':' && line.charAt(5) == ':') {
335 if(!ignore_for_robots) {
336 DebugStream.println("Error.");
337 downloadFailed();
338 }
339 else {
340 ignore_for_robots = false;
341 }
342 }
343 }
344 }
345 else if (max_download == DownloadJob.DEFINED_MAX) {
346 if (line.lastIndexOf("<<Total number of record(s):") != -1) {
347 String total_ID = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
348 progress.setTotalDownload((Integer.valueOf(total_ID)).intValue());
349 progress.resetFileCount();
350 }
351 else if (line.lastIndexOf("<<Done>>") != -1) {
352 progress.increaseFileCount();
353 }
354 else if(line.lastIndexOf("<<Done:") != -1) {
355 String completed_amount = line.substring(line.indexOf(":") + 1, line.indexOf(">"));
356 progress.increaseFileCount((Integer.valueOf(completed_amount)).intValue());
357 }
358
359 DebugStream.println(line);
360 download_log.appendLine(line);
361 }
362 else {
363 System.out.println("Error!!");
364 System.exit(-1);
365 }
366 }
367 if(state == STOPPED) {
368 isr.close();
369 prcs.destroy(); // This doesn't always work, but it's worth a try
370 }
371
372 }
373 catch (Exception ioe) {
374 //message(Utility.ERROR, ioe.toString());
375 //JTest
376 DebugStream.printStackTrace(ioe);
377 }
378 // If we've got to here and the state isn't STOPPED then the
379 // job is complete.
380 if(state == DownloadJob.RUNNING) {
381 progress.mirrorComplete();
382 previous_state = state;
383 state = DownloadJob.COMPLETE;
384
385 }
386 // refresh the workspace tree
387 Gatherer.g_man.refreshWorkspaceTree(WorkspaceTree.DOWNLOADED_FILES_CHANGED);
388
389 }
390
391
392 /** Called by the WGet native code when the current download is
393 * completed. In turn all download listeners are informed.
394 */
395 public void downloadComplete() {
396 progress.downloadComplete();
397 }
398
399
400 public void downloadComplete(String current_file_downloading)
401 {
402 progress.downloadComplete();
403 DebugStream.println("Download complete: " + current_file_downloading);
404 }
405
406
407 /** Called by the WGet native code when the requested download returns
408 * a status code other than 200.
409 */
410 public void downloadFailed() {
411 // TODO!!
412 //failed_urls.add(current_url); // It is the current url that failed
413 progress.downloadFailed();
414 //DebugStream.println("Download failed: " + current_url);
415 }
416
417 /**
418 */
419 public void downloadWarning() {
420 progress.downloadWarning();
421 }
422
423
424 /**
425 * @return A String representing the initial urls host (root node
426 * of tree that we are mirroring).
427 */
428 public String getHost() {
429 return url.getHost();
430 }
431
432 public AppendLineOnlyFileDocument getLogDocument() {
433 return download_log;
434 }
435
436 /**
437 * @return Returns the progress bar associated with this job.
438 */
439 public DownloadProgressBar getProgressBar() {
440 return progress;
441 }
442
443 /** Called to discover if the user wanted this thread to run or if
444 * it is paused.
445 * @return An int representing the current DownloadJob state.
446 */
447 public int getState() {
448 return state;
449 }
450
451 /** Returns the current state of the stop flag for this job.
452 * @return A boolean representing whether the user has requested to
453 * stop.
454 */
455 public boolean hasSignalledStop() {
456 if(state == DownloadJob.STOPPED || state == DownloadJob.PAUSED ||
457 state == DownloadJob.COMPLETE) {
458 return true;
459 }
460 return false;
461 }
462
463 public void setState(int state) {
464 previous_state = this.state;
465 this.state = state;
466 }
467
468 /** A convenience call.
469 * @return A String representing the url of the initial url (root node of the mirrored tree).
470 */
471 public String toString() {
472 return download_url;
473 }
474
475 /** Called by the WGet native code to signal the current progress of
476 * downloading.
477 * @param current A long representing the number of bytes that have
478 * been downloaded since the last update.
479 * @param expected A long representing the total number of bytes
480 * expected for this download.
481 */
482 public void updateProgress(long current, long expected) {
483 progress.updateProgress(current, expected);
484 }
485}
Note: See TracBrowser for help on using the repository browser.