source: trunk/greenstone3-extensions/gs3build/src/org/greenstone/gsdl3/gs3build/util/URLString.java@ 12188

Last change on this file since 12188 was 12188, checked in by kjdon, 18 years ago

Initial revision

  • Property svn:keywords set to Author Date Id Revision
File size: 12.8 KB
Line 
1package org.greenstone.gsdl3.gs3build.util;
2
3import java.applet.*;
4import java.net.*;
5
6public class URLString
7{ private String url;
8
9 private String urlbase(String url)
10 { int index;
11 String tailString, bodyString;
12 int body;
13
14 index = url.indexOf("//");
15 tailString = url.substring(index+2, url.length());
16
17 body = tailString.indexOf('/');
18 bodyString = tailString.substring(0, body+1);
19
20 tailString = url.substring(0, index+2) + bodyString;
21 return tailString;
22 }
23
24 public URLString(String s)
25 { this.url = s;
26 }
27
28 public URLString(String s, String target)
29 { // reconcile
30 int sref;
31 int cref;
32 int c;
33
34 // trim fragment identifiers
35 target = noRef(target);
36 if (target == null)
37 { this.url = s;
38 return;
39 }
40
41 // test for absolute
42 cref = target.indexOf(':');
43 if (cref >= 0)
44 { for (c = 0; c < cref; c ++)
45 { if (target.charAt(c) != '+' &&
46 target.charAt(c) != '-' &&
47 target.charAt(c) != '.' &&
48 Character.isLetterOrDigit(target.charAt(c)) == false)
49 { break;
50 }
51 }
52 }
53 else
54 { c = 0;
55 }
56
57 if (c == cref)
58 { // absolute reference
59 String tmp;
60
61 tmp = target.substring(0, c);
62 tmp = tmp.toLowerCase();
63 if (target.indexOf("//") == cref + 1)
64 { if (target.lastIndexOf('/') == cref + 2)
65 { target = target + "/";
66 }
67 }
68 /* if (tmp.equals("http") == false)
69 { this.url = null;
70 return;
71 }*/
72 this.url = target;
73 }
74 else if (target.indexOf("//") == 0)
75 { // absolute - implicit scheme (ie http).; also check for no trailing '/'
76 if (target.lastIndexOf('/') == 1)
77 { target = target + "/";
78 }
79 target = "http:" + target;
80 this.url = target;
81 }
82 else
83 { // relative - trim silly '/' at end
84 if (target.charAt(0) == '/')
85 { // if we get a leading '/' then add it to the base of the url
86 s = urlbase(s);
87 target = target.substring(1);
88 this.url = s + target;
89// System.out.println(this.url+"<"+s+"<"+target);
90 return;
91 }
92
93 while(target.length() >= 2 && target.charAt(0) == '.' && target.charAt(1) == '.')
94 { // move up a level on the left
95
96 sref = s.lastIndexOf('/');
97 s = s.substring(0, sref);
98
99 target = target.substring(3);
100 }
101 // strip trailing leaf from s
102 sref = s.lastIndexOf('/');
103 s = s.substring(0, sref+1);
104 // append s+target
105 this.url = s + target;
106 }
107
108 // abandon the url if it is a call to a script
109 if (this.url.indexOf('?') >= 0)
110 { this.url = null;
111 }
112 }
113
114 public static boolean dirParent(String parent, String child)
115 { while (child.length() >= parent.length())
116 { child = child.substring(0, child.lastIndexOf('/'));
117
118 if (parent.equals(child))
119 { return true;
120 }
121 }
122 return false;
123 }
124
125 public static int dirPos(String from, String to)
126 { String fromdir;
127 String todir;
128
129 fromdir = from.substring(0, from.lastIndexOf('/'));
130 todir = to.substring(0, to.lastIndexOf('/'));
131
132 if (fromdir.equals(todir))
133 { return 0;
134 }
135 if (fromdir.length() > todir.length())
136 { if (dirParent(todir, fromdir) == true)
137 { return -1;
138 }
139 }
140 else
141 { if (dirParent(fromdir, todir) == true)
142 { return 1;
143 }
144 }
145 return -2;
146 }
147
148 public static int commonLevels(String a, String b)
149 { String path_a, path_b;
150 int offset_a, offset_b;
151 String dir_a, dir_b;
152 int common;
153
154 path_a = path(a);
155 path_b = path(b);
156 common = 0;
157
158 if (path_a == null || path_b == null)
159 { return 0;
160 }
161
162 do
163 { offset_a = path_a.indexOf('/');
164 offset_b = path_b.indexOf('/');
165
166 if (offset_a > 0)
167 { dir_a = path_a.substring(0, offset_a);
168 if (path_a.length() > offset_a + 1)
169 { path_a = path_a.substring(offset_a+1);
170 }
171 else
172 { path_a = null;
173 }
174 }
175 else
176 { dir_a = path_a;
177 path_a = null;
178 }
179
180 if (offset_b > 0)
181 { dir_b = path_b.substring(0, offset_b);
182 if (path_b.length() > offset_b + 1)
183 { path_b = path_b.substring(offset_b+1);
184 }
185 else
186 { path_b = null;
187 }
188 }
189 else
190 { dir_b = path_b;
191 path_b = null;
192 }
193
194 if (dir_a.length() != dir_b.length() ||
195 dir_a.equals(dir_b) == false)
196 { return common;
197 }
198 common ++;
199 } while (path_a != null && path_b != null);
200
201 return common;
202 }
203
204 public static String dirLevelName(String url, int level)
205 { String lpath;
206 String dir;
207 int thislevel = 0;
208 int offset;
209
210 lpath = path(url);
211 if (lpath == null)
212 { return null;
213 }
214 do
215 { offset = lpath.indexOf('/');
216
217 if (offset >= 0)
218 { dir = lpath.substring(0, offset);
219 lpath = lpath.substring(offset+1);
220 }
221 else
222 { dir = lpath;
223 }
224
225 thislevel ++;
226
227 if (level == thislevel)
228 { return dir;
229 }
230 } while (offset >= 0);
231
232 return null;
233 }
234
235 public static int dirLevel(String url)
236 { String lpath;
237 int level;
238 int offset;
239
240 lpath = path(url);
241 if (lpath == null)
242 { return 0;
243 }
244 level = 0;
245 do
246 { offset = lpath.indexOf('/');
247 if (offset >= 0)
248 { lpath = lpath.substring(offset+1);
249 }
250 level ++;
251 } while (offset >= 0);
252
253 return level;
254 }
255
256 public static String tidyURL(String urlString, boolean permitScript)
257 { int cref;
258 int c;
259 int sref;
260 String tmp, s;
261
262 s = urlString;
263 cref = s.indexOf(':');
264 if (cref >= 0)
265 { for (c = 0; c < cref; c ++)
266 { if (s.charAt(c) != '+' &&
267 s.charAt(c) != '-' &&
268 s.charAt(c) != '.' &&
269 Character.isLetterOrDigit(s.charAt(c)) == false)
270 { break;
271 }
272 }
273 }
274 else
275 { c = 0;
276 }
277
278 if (cref == c)
279 { tmp = s.substring(0, c).toLowerCase();
280 s = tmp + s.substring(c, s.length());
281 if (s.lastIndexOf('/') == cref + 2)
282 { s = s + "/";
283 }
284 }
285 else
286 { sref = s.indexOf("//");
287 if (sref == 0)
288 { s = "http:" + s;
289 }
290 else
291 { s = "http://" + s;
292 }
293 if (s.lastIndexOf('/') == 6)
294 { s = s + "/";
295 }
296 }
297 return s;
298 }
299
300 public static String tidyURL(String urlString)
301 { return tidyURL(urlString, false);
302 }
303
304 public static URL toURL(String urlString)
305 { URL reply;
306
307 try
308 { reply = new URL(URLString.tidyURL(urlString));
309 }
310 catch (MalformedURLException ex)
311 { return null;
312 }
313 return reply;
314 }
315
316 public void tidy()
317 { this.url = tidyURL(this.url);
318 }
319
320 public boolean isNull()
321 { if (this.url == null)
322 { return true;
323 }
324 return false;
325 }
326
327 public static boolean isForeign(String home, URL url)
328 { if (url.toString().length() < home.length())
329 { return true;
330 }
331
332 if (home.equals(url.toString().substring(0, home.length())))
333 { return false;
334 }
335 return true;
336 }
337
338 /**
339 * Remove any trailing "#localanchor" local reference from a URL string
340 *
341 * @param <code>String</code> the original url
342 * @return <code>String</code> the cleaned url, sans local references
343 */
344 public static String noRef(String target)
345 { if (target == null)
346 { return null;
347 }
348 if (target.indexOf('#') >= 0)
349 { if (target.indexOf('#') == 0)
350 { return null;
351 }
352 target = target.substring(0, target.indexOf('#'));
353 }
354 return target;
355 }
356
357 /**
358 * Obtain the trailing local anchor reference from a URL,
359 * if it exists.
360 * e.g. for http://www.test.mdx.ac.uk/example/index.html#credits the
361 * response would be "credits"
362 *
363 * @param <code>String</code> the original url.
364 * @return <code>String</code> the local reference. <code>null</code> is
365 * returned if no local reference exists.
366 */
367 public static String subRef(String target)
368 { if (target.indexOf('#') >= 0)
369 { return target.substring(target.indexOf('#')+1);
370 }
371 return null;
372 }
373
374 /**
375 * Clean a URL of the form GET parameters.
376 *
377 * @param <code>String</code> the original url, with GET message parameters
378 * @return <code>String</code> the cleaned url.
379 */
380 public static String noParameters(String url)
381 { int c;
382
383 c = url.indexOf("?");
384 if (c < 0)
385 { return url;
386 }
387 return url.substring(0, c);
388 }
389
390 /**
391 * Obtain the form parameters from a url
392 *
393 * @param <code>String</code> the original url, with GET message parameters
394 * @return <code>String</code> the form "GET" parameters. The leading '?' is
395 * preserved in the reply. If no parameters were
396 * given, the result is <code>null</code>.
397 */
398 public static String formParameters(String url)
399 { int c;
400
401 c = url.indexOf("?");
402 if (c < 0)
403 { return null;
404 }
405 return url.substring(c);
406 }
407
408 /**
409 * Flatten a URL, making the '?' and '&' items into underscores
410 *
411 * @param <code>String</code> the original url, with GET message parameters
412 * @return <code>String</code> the "flattened" url
413 */
414 public static String flattenedUrl(String oldUrl)
415 { StringBuffer reply;
416 int c;
417 boolean modified = false;
418
419 // replace ? with _
420 // replate & with _
421 reply = new StringBuffer(oldUrl);
422 for (c = 0; c < reply.length(); c ++)
423 { if (reply.charAt(c) == '?' ||
424 reply.charAt(c) == '&')
425 { reply.setCharAt(c, '_');
426 modified = true;
427 }
428 }
429
430 if (modified == true)
431 { c = oldUrl.indexOf(".html");
432 if (c >= 0)
433 { while (c < oldUrl.length() - 5)
434 { reply.setCharAt(c, reply.charAt(c+5));
435 c ++;
436 }
437 reply.setCharAt(c ++, '.');
438 reply.setCharAt(c ++, 'h');
439 reply.setCharAt(c ++, 't');
440 reply.setCharAt(c ++, 'm');
441 reply.setCharAt(c ++, 'l');
442 }
443 }
444
445 return reply.toString();
446 }
447
448 /**
449 * Provide the complete pathname in the given url
450 * eg. "example/index.html" for http://www.test.mdx.ac.uk/example/index.html
451 *
452 * @param <code>String</code> url of page
453 * @return <code>String</code> leaf file name
454 */
455 public static String pathName(String url)
456 { int offset;
457 String remainder;
458
459 if (url == null)
460 { return null;
461 }
462
463 offset = url.indexOf("//");
464 if (offset < 0)
465 { return null;
466 }
467
468 remainder = url.substring(offset + 2);
469 offset = remainder.indexOf('/');
470 if (offset < 0)
471 { return null;
472 }
473 return remainder.substring(offset + 1);
474 }
475
476 /**
477 * Provide the leaf filename in the given url
478 * eg. "index.html" for http://www.test.mdx.ac.uk/example/index.html
479 *
480 * @param <code>String</code> url of page
481 * @return <code>String</code> leaf file name
482 */
483 public static String leafName(String url)
484 { int offset;
485 int dot;
486
487 if (url == null)
488 { return null;
489 }
490 offset = url.lastIndexOf('/');
491 if (offset < 0)
492 { return null;
493 }
494 if (offset == url.length() - 1)
495 { return null;
496 }
497 dot = url.lastIndexOf('/');
498 if (dot < offset)
499 { return null;
500 }
501 return url.substring(offset+1);
502 }
503
504 /**
505 * Provide the hostname in the given url
506 * eg. "www.test.mdx.ac.uk" for http://www.test.mdx.ac.uk/example/index.html
507 *
508 * @param <code>String</code> url of page
509 * @return <code>String</code> host name
510 */
511 public static String host(String url)
512 { String scheme, tail;
513 int spos;
514
515 spos = url.indexOf("//");
516 if (spos >= 0)
517 { scheme = url.substring(0, spos + 2);
518 tail = url.substring(spos + 2, url.length());
519 // do sub-areas
520 spos = tail.indexOf('/');
521 if (spos >= 0)
522 { tail = tail.substring(0, spos+1);
523 }
524 scheme = scheme + tail;
525 return scheme;
526 }
527 return null;
528 }
529
530 /**
531 * Return the logical path of the url on the remote server
532 * eg. "example" for http://www.test.mdx.ac.uk/example/index.html
533 *
534 * @param <code>String</code> url of page
535 * @return <code>String</code> logical path
536 */
537 public static String path(String url)
538 { String tail;
539 int offset;
540 String leaf;
541
542 if (url == null)
543 { return null;
544 }
545
546 offset = url.indexOf("//");
547 if (offset < 0)
548 { return null;
549 }
550
551 tail = url.substring(offset+2);
552 offset = tail.indexOf('/');
553 if (offset < 0)
554 { return null;
555 }
556
557 tail = tail.substring(offset+1);
558 if (tail == null || tail.length() == 0)
559 { return null;
560 }
561
562 offset = tail.lastIndexOf('/');
563 if (offset < 0)
564 { return null;
565 }
566 else
567 { tail = tail.substring(0, offset);
568 }
569 return tail;
570 }
571
572 /**
573 * Give the 'dot' extension of a url;
574 * eg. "html" for http://www.test.mdx.ac.uk/example/index.html
575 *
576 * @param <code>String</code> url
577 * @return <code>String</code> extension.
578 */
579 public static String extension(String url)
580 { int offset;
581 String leaf;
582
583 if (url == null)
584 { return null;
585 }
586
587 offset = url.lastIndexOf('/');
588 if (offset >= 0 && offset < url.length() - 1)
589 { leaf = url.substring(offset+1, url.length());
590 offset = leaf.lastIndexOf('.');
591
592 // if we've got an extension then use it
593 if (offset >= 0)
594 { int poffset;
595
596 // get parameter offset
597 poffset = leaf.indexOf('?');
598 if (poffset < 0)
599 { poffset = leaf.length();
600 }
601
602 // give leaf value
603 return leaf.substring(offset, poffset);
604 }
605 else
606 { return null;
607 }
608 }
609 return null;
610 }
611
612 public static String extension(URL url)
613 { return extension(url.toString());
614 }
615
616 public String toString()
617 { return this.url;
618 }
619}
Note: See TracBrowser for help on using the repository browser.