source: trunk/gli/src/org/greenstone/gatherer/util/Utility.java@ 10345

Last change on this file since 10345 was 10345, checked in by mdewsnip, 19 years ago

Removed some more crap out of the Utility class.

  • Property svn:keywords set to Author Date Id Revision
File size: 17.2 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.util;
38
39// Don't even think about adding import java.awt.* here!
40// The functions in this class should not use any graphical classes. Put your function somewhere else buster!
41import java.io.*;
42import java.net.*;
43import java.util.*;
44// Don't even think about adding import javax.swing.* here!
45// The functions in this class should not use any graphical classes. Put your function somewhere else buster!
46import org.apache.xerces.parsers.*;
47import org.apache.xerces.dom.DocumentImpl;
48import org.apache.xml.serialize.*;
49import org.greenstone.gatherer.Configuration;
50import org.greenstone.gatherer.DebugStream;
51import org.greenstone.gatherer.Dictionary;
52import org.greenstone.gatherer.shell.GShell;
53// Don't even think about adding import org.greenstone.gatherer.Gatherer in here!
54// The functions in this class should be independent of the Gatherer class. Put your function somewhere else buster!
55import org.w3c.dom.*;
56import org.xml.sax.*;
57
58
59/** To provide a library of common methods, in a static context, for use in the Gatherer.
60 * @author John Thompson, Greenstone Digital Library, University of Waikato
61 * @version 2.3b
62 */
63public class Utility
64{
65 /** The size of the io buffer, in bytes. */
66 static final public int BUFFER_SIZE = 1024;
67 static final public String CFG_CLASSIFY = "classify";
68 static final public String CFG_CLASSIFY_BUTTONNAME = "-buttonname";
69 static final public String CFG_CLASSIFY_HFILE = "-hfile";
70 static final public String CFG_CLASSIFY_METADATA = "-metadata";
71 static final public String CFG_CLASSIFY_SORT = "-sort";
72 static final public String CFG_FORMAT = "format";
73 static final public String COLLECTION_TREE = "Collection";
74 /** Definition of an important directory name, in this case the file the collection configuration is expect to be in. */
75 static final public String CONFIG_FILE = "etc" + File.separator + "collect.cfg";
76 static final public String GLI_ARCHIVE = "GLI.jar";
77 static final public String GLI_EXTENSION = ".col";
78
79 /** Definition of an important directory name, in this case the images directory for the collection. */
80 static final public String IMAGES_DIR = "images" + File.separator;
81 /** Definition of an important directory name, in this case the import directory for the collection. */
82 static final public String IMPORT_DIR = "import" + File.separator;
83 /** Definition of an important directory name, in this case the backup import directory for the collection. */
84 static final public String IMPORT_BAK_DIR = "import.bak" + File.separator;
85 /** Definition of an important directory name, in this case the macros directory for the collection. */
86 static final public String MACROS_DIR = "macros" + File.separator;
87 /** Definition of an important directory name, in this case the location of the expected collection metadata sets.. */
88 static final public String META_DIR = "metadata" + File.separator; // Col. Copy
89 /** Definition of an important zip file, in this case zipped up version of metadata file stored in JAR file */
90 static final public String METADATA_ZIP = "metadata.zip";
91 static final public String COLLECT_ZIP = "collect.zip";
92 /** The default name of the perl executable under unix. */
93 static final public String PERL_EXECUTABLE_UNIX = "perl";
94 /** The default name of the perl executable under windows. */
95 static final public String PERL_EXECUTABLE_WINDOWS = "Perl.exe";
96 /** The name of the Gatherer. */
97 static final public String PROGRAM_NAME = "Greenstone Librarian Interface";
98 /** The current version of the Gatherer. */
99 static final public String PROGRAM_VERSION = "v2.60";
100 static final public String WORKSPACE_TREE = "Workspace";
101
102
103 /**
104 * Delete a file or directory
105 * It turns out that in Java you have to make sure a directory is empty before you delete it (much like unix I suppose),
106 * and so just like unix I'll have to set up a recursive delete function.
107 *
108 * @param file The <strong>File</strong> you want to delete.
109 * @return A <i>boolean</i> which is <i>true</i> if the file specified was successfully deleted, <i>false</i> otherwise.
110 */
111 static public boolean delete(File file)
112 {
113 // Nothing to do if it doesn't exist
114 if (!file.exists()) {
115 return true;
116 }
117
118 // If file is a directory, we have to recursively delete its contents first
119 if (file.isDirectory()) {
120 File files[] = file.listFiles();
121 for (int i = 0; i < files.length; i++) {
122 if (delete(files[i]) == false) {
123 System.err.println("Error: Could not delete folder " + file);
124 return false;
125 }
126 }
127 }
128
129 // Delete file
130 if (file.delete() == false) {
131 System.err.println("Error: Could not delete file " + file);
132 return false;
133 }
134
135 return true;
136 }
137
138
139 static public boolean delete(String filename) {
140 return delete(new File(filename));
141 }
142
143
144 /** Convert a long, detailing the length of a file in bytes, into a nice human readable string using b, kb, Mb and Gb. */
145 static final public String BYTE_SUFFIX = " b";
146 static final public long GIGABYTE = 1024000000l;
147 static final public String GIGABYTE_SUFFIX = " Gb";
148 static final public long KILOBYTE = 1024l;
149 static final public String KILOBYTE_SUFFIX = " kb";
150 static final public long MEGABYTE = 1024000l;
151 static final public String MEGABYTE_SUFFIX = " mb";
152 static final public String formatFileLength(long length) {
153 StringBuffer result = new StringBuffer("");
154 float number = 0f;
155 String suffix = null;
156 // Determine the floating point number and the suffix (radix) used.
157 if(length >= GIGABYTE) {
158 number = (float) length / (float) GIGABYTE;
159 suffix = GIGABYTE_SUFFIX;
160 }
161 else if(length >= MEGABYTE) {
162 number = (float) length / (float) MEGABYTE;
163 suffix = MEGABYTE_SUFFIX;
164 }
165 else if(length >= KILOBYTE) {
166 number = (float) length / (float) KILOBYTE;
167 suffix = KILOBYTE_SUFFIX;
168 }
169 else {
170 // Don't need to do anything fancy if the file is smaller than a kilobyte
171 return length + BYTE_SUFFIX;
172 }
173 // Create the formatted string remembering to round the number to 2.d.p. To do this copy everything in the number string from the start to the first occurance of '.' then copy two more digits. Finally search for and print anything that appears after (and including) the optional 'E' delimter.
174 String number_str = Float.toString(number);
175 char number_char[] = number_str.toCharArray();
176 int pos = 0;
177 // Print the characters up to the '.'
178 while(number_char != null && pos < number_char.length && number_char[pos] != '.') {
179 result.append(number_char[pos]);
180 pos++;
181 }
182 if(pos < number_char.length) {
183 // Print the '.' and at most two characters after it
184 result.append(number_char[pos]);
185 pos++;
186 for(int i = 0; i < 2 && pos < number_char.length; i++, pos++) {
187 result.append(number_char[pos]);
188 }
189 // Search through the remaining string for 'E'
190 while(pos < number_char.length && number_char[pos] != 'E') {
191 pos++;
192 }
193 // If we still have string then we found an E. Copy the remaining string.
194 while(pos < number_char.length) {
195 result.append(number_char[pos]);
196 pos++;
197 }
198 }
199 // Add suffix
200 result.append(suffix);
201 // Done
202 return result.toString();
203 }
204
205 /** This method formats a given string, using HTML markup, so its width does not exceed the given width and its appearance if justified.
206 * @param text The <strong>String</strong> requiring formatting.
207 * @param width The maximum width per line as an <i>int</i>.
208 * @return A <strong>String</strong> formatted so as to have no line longer than the specified width.
209 * TODO Currently HTML formatting tags are simply removed from the text, as the effects of spreading HTML tags over a break are undetermined. To solve this we need to associate tags with a certain text token so if it gets broken on to the next line the tags go with it, or if the tags cover a sequence of words that are broken we need to close then reopen the tags. However all this is a major task and well beyond anything I have time to 'muck-round' on.
210 */
211 static public String formatHTMLWidth(String text, int width) {
212 if(text == null) {
213 return "Error";
214 }
215 HTMLStringTokenizer html = new HTMLStringTokenizer(text);
216 int current_width = 0;
217 int threshold = width / 2;
218 Stack lines = new Stack();
219 String line = "";
220 while(html.hasMoreTokens()) {
221 String token = html.nextToken();
222 while(token != null) {
223 if(html.isTag()) {
224 // Insert smart HTML tag code here.
225 token = null;
226 }
227 else {
228 // If the token is bigger than two thirds width, before we've even started break it down.
229 if(current_width + 1 + token.length() > width && token.length() > threshold) {
230 if(width == current_width) {
231 lines.push(line);
232 line = token;
233 current_width = token.length();
234 }
235 else {
236 String prefix = token.substring(0, width - 1 - current_width);
237 token = token.substring(prefix.length());
238 if(current_width == 0) {
239 line = line + prefix;
240 }
241 else {
242 line = line + " " + prefix;
243 }
244 lines.push(line);
245 line = "";
246 current_width = 0;
247 }
248 }
249 // If adding the next token would push us over the maximum line width.
250 else if(current_width + 1 + token.length() > width) {
251 lines.push(line);
252 line = token;
253 current_width = token.length();
254 token = null;
255 }
256 // Otherwise we should be able to just add the token, give or take.
257 else {
258 if(current_width == 0) {
259 line = line + token;
260 current_width = token.length();
261 }
262 else {
263 // Special case for standard punctuation which may exist after a tag like so:
264 // My name is <scratchy>Slim Shady</scratchy>. <-- Annoying punctuation.
265 if(token.equals(".") || token.equals(",") || token.equals("!") || token.equals("?")) {
266 line = line + token;
267 current_width = current_width + 1;
268 }
269 else {
270 line = line + " " + token;
271 current_width = current_width + 1 + token.length();
272 }
273 }
274 token = null;
275 }
276 }
277 }
278 }
279 String result = line;
280 while(!lines.empty()) {
281 result = (String)lines.pop() + "<BR>" + result;
282 }
283 // Replace ' ' with "&nbsp;"
284 boolean tag = false;
285 int pos = 0;
286 while(pos < result.length()) {
287 if(result.charAt(pos) == '<') {
288 tag = true;
289 }
290 else if(result.charAt(pos) == '>') {
291 tag = false;
292 }
293 else if(result.charAt(pos) == ' ' && !tag) {
294 String prefix = result.substring(0, pos);
295 String suffix = result.substring(pos + 1);
296 result = prefix + "&nbsp;" + suffix;
297 }
298 pos++;
299 }
300 result = "<HTML>" + result + "</HTML>";
301 return result;
302 }
303
304
305 static public String getDateString() {
306 Calendar current = Calendar.getInstance();
307 String day_name = null;
308 switch(current.get(Calendar.DAY_OF_WEEK)) {
309 case Calendar.MONDAY: day_name = "Dates.Mon"; break;
310 case Calendar.TUESDAY: day_name = "Dates.Tue"; break;
311 case Calendar.WEDNESDAY: day_name = "Dates.Wed"; break;
312 case Calendar.THURSDAY: day_name = "Dates.Thu"; break;
313 case Calendar.FRIDAY: day_name = "Dates.Fri"; break;
314 case Calendar.SATURDAY: day_name = "Dates.Sat"; break;
315 case Calendar.SUNDAY: day_name = "Dates.Sun"; break;
316 default: day_name = "";
317 }
318 String month_name = null;
319 switch(current.get(Calendar.MONTH)) {
320 case Calendar.JANUARY: month_name = "Dates.Jan"; break;
321 case Calendar.FEBRUARY: month_name = "Dates.Feb"; break;
322 case Calendar.MARCH: month_name = "Dates.Mar"; break;
323 case Calendar.APRIL: month_name = "Dates.Apr"; break;
324 case Calendar.MAY: month_name = "Dates.May"; break;
325 case Calendar.JUNE: month_name = "Dates.Jun"; break;
326 case Calendar.JULY: month_name = "Dates.Jul"; break;
327 case Calendar.AUGUST: month_name = "Dates.Aug"; break;
328 case Calendar.SEPTEMBER: month_name = "Dates.Sep"; break;
329 case Calendar.OCTOBER: month_name = "Dates.Oct"; break;
330 case Calendar.NOVEMBER: month_name = "Dates.Nov"; break;
331 case Calendar.DECEMBER: month_name = "Dates.Dec"; break;
332 default: month_name = "";
333 }
334 int day = current.get(Calendar.DAY_OF_MONTH);
335 int hour = current.get(Calendar.HOUR_OF_DAY);
336 int minute = current.get(Calendar.MINUTE);
337 int second = current.get(Calendar.SECOND);
338 int year = current.get(Calendar.YEAR);
339
340 return Dictionary.get(day_name) + " " + Dictionary.get(month_name) + " " + day + " " + year + " " + Utility.pad(String.valueOf(hour), 2, '0', true) + ":" + Utility.pad(String.valueOf(minute), 2, '0', true) + ":" + Utility.pad(String.valueOf(second), 2, '0', true);
341 }
342
343
344 /** Determine this machines name.
345 * @return The name as a <strong>String</strong>.
346 */
347 static public String getMachineName() {
348 try {
349 return InetAddress.getLocalHost().getHostName();
350 }
351 catch(UnknownHostException ex) {
352 }
353 return "Unknown Machine";
354 }
355
356
357 static public String getSitesDir(String gsdl3_path) {
358 return gsdl3_path + "sites" + File.separator;
359
360 }
361
362
363 /** returns the path to the greenstone version of wget */
364 static public String getWGetPath(String gsdl_path)
365 {
366 if (isWindows()) {
367 return gsdl_path + "bin" + File.separator + "windows" + File.separator + "wget.exe";
368 }
369 else if (isMac()) {
370 return gsdl_path + "bin" + File.separator + "darwin" + File.separator + "wget";
371 }
372 else {
373 return gsdl_path + "bin" + File.separator + "linux" + File.separator + "wget";
374 }
375 }
376
377
378 /** Method to determine if the host system is MacOS based.
379 * @return a boolean which is true if the platform is MacOS, false otherwise
380 */
381 public static boolean isMac() {
382 Properties props = System.getProperties();
383 String os_name = props.getProperty("os.name","");
384 if(os_name.startsWith("Mac OS")) {
385 return true;
386 }
387 return false;
388 }
389
390
391 /** Method to determine if the host system is Microsoft Windows based.
392 * @return A <i>boolean</i> which is <i>true</i> if the platform is Windows, <i>false</i> otherwise.
393 */
394 public static boolean isWindows() {
395 Properties props = System.getProperties();
396 String os_name = props.getProperty("os.name","");
397 if(os_name.startsWith("Windows")) {
398 return true;
399 }
400 return false;
401 }
402
403 public static boolean isWindows9x() {
404 Properties props = System.getProperties();
405 String os_name = props.getProperty("os.name","");
406 if(os_name.startsWith("Windows") && os_name.indexOf("9") != -1) {
407 return true;
408 }
409 return false;
410 }
411 /** Takes a string and a desired length and pads out the string to the length by adding spaces to the left.
412 * @param str The target <strong>String</strong> that needs to be padded.
413 * @param length The desired length of the string as an <i>int</i>.
414 * @return A <strong>String</strong> made from appending space characters with the string until it has a length equal to length.
415 */
416 static private String pad(String str_raw, int length, char fill, boolean end) {
417 StringBuffer str = new StringBuffer(str_raw);
418 while(str.length() < length) {
419 if(end) {
420 str.insert(0, fill);
421 }
422 else {
423 str.append(fill);
424 }
425 }
426 return str.toString();
427 }
428
429
430 static public StringBuffer readXMLStream(InputStream input_stream)
431 {
432 StringBuffer xml = new StringBuffer("");
433
434 try {
435 InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
436 BufferedReader buffered_in = new BufferedReader(isr);
437
438 String line = "";
439 boolean xml_content = false;
440 while((line = buffered_in.readLine()) != null) {
441 if(xml_content) {
442 xml.append(line);
443 xml.append("\n");
444 }
445 else if(line.trim().startsWith("<?xml")) {
446 xml_content = true;
447 xml.append(line);
448 xml.append("\n");
449 }
450 }
451 buffered_in = null;
452 }
453 catch (Exception error) {
454 System.err.println("Failed when trying to parse XML stream");
455 error.printStackTrace();
456 }
457
458 return xml;
459 }
460
461
462 /** I think this works a bit better on Unicode strings. */
463 static public String stripNL(String raw_string)
464 {
465 String stripped_string = new String();
466 for (int i = 0; i < raw_string.length(); i++) {
467 char raw_character = raw_string.charAt(i);
468 if (raw_character != '\n' && raw_character != '\t') {
469 stripped_string = stripped_string + raw_character;
470 }
471 }
472 return stripped_string;
473 }
474}
Note: See TracBrowser for help on using the repository browser.