source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/util/URLString.java@ 5823

Last change on this file since 5823 was 5800, checked in by cs025, 21 years ago

Adding gs3build

  • Property svn:keywords set to Author Date Id Revision
File size: 13.4 KB
Line 
1package org.greenstone.gsdl3.gs3build.util;
2
3
4
5import java.applet.*;
6
7import java.net.*;
8
9
10
11public class URLString
12
13{ private String url;
14
15
16
17 private String urlbase(String url)
18
19 { int index;
20
21 String tailString, bodyString;
22
23 int body;
24
25
26
27 index = url.indexOf("//");
28
29 tailString = url.substring(index+2, url.length());
30
31
32
33 body = tailString.indexOf('/');
34
35 bodyString = tailString.substring(0, body+1);
36
37
38
39 tailString = url.substring(0, index+2) + bodyString;
40
41 return tailString;
42
43 }
44
45
46
47 public URLString(String s)
48
49 { this.url = s;
50
51 }
52
53
54
55 public URLString(String s, String target)
56
57 { // reconcile
58
59 int sref;
60
61 int cref;
62
63 int c;
64
65
66
67 // trim fragment identifiers
68
69 target = noRef(target);
70
71 if (target == null)
72
73 { this.url = s;
74
75 return;
76
77 }
78
79
80
81 // test for absolute
82
83 cref = target.indexOf(':');
84
85 if (cref >= 0)
86
87 { for (c = 0; c < cref; c ++)
88
89 { if (target.charAt(c) != '+' &&
90
91 target.charAt(c) != '-' &&
92
93 target.charAt(c) != '.' &&
94
95 Character.isLetterOrDigit(target.charAt(c)) == false)
96
97 { break;
98
99 }
100
101 }
102
103 }
104
105 else
106
107 { c = 0;
108
109 }
110
111
112
113 if (c == cref)
114
115 { // absolute reference
116
117 String tmp;
118
119
120
121 tmp = target.substring(0, c);
122
123 tmp = tmp.toLowerCase();
124
125 if (target.indexOf("//") == cref + 1)
126
127 { if (target.lastIndexOf('/') == cref + 2)
128
129 { target = target + "/";
130
131 }
132
133 }
134
135/* if (tmp.equals("http") == false)
136
137 { this.url = null;
138
139 return;
140
141 }*/
142
143 this.url = target;
144
145 }
146
147 else if (target.indexOf("//") == 0)
148
149 { // absolute - implicit scheme (ie http).; also check for no trailing '/'
150
151 if (target.lastIndexOf('/') == 1)
152
153 { target = target + "/";
154
155 }
156
157 target = "http:" + target;
158
159 this.url = target;
160
161 }
162
163 else
164
165 { // relative - trim silly '/' at end
166
167 if (target.charAt(0) == '/')
168
169 { // if we get a leading '/' then add it to the base of the url
170
171 s = urlbase(s);
172
173 target = target.substring(1);
174
175 this.url = s + target;
176
177// System.out.println(this.url+"<"+s+"<"+target);
178
179 return;
180
181 }
182
183
184
185 while(target.length() >= 2 && target.charAt(0) == '.' && target.charAt(1) == '.')
186
187 { // move up a level on the left
188
189
190
191 sref = s.lastIndexOf('/');
192
193 s = s.substring(0, sref);
194
195
196
197 target = target.substring(3);
198
199 }
200
201 // strip trailing leaf from s
202
203 sref = s.lastIndexOf('/');
204
205 s = s.substring(0, sref+1);
206
207 // append s+target
208
209 this.url = s + target;
210
211 }
212
213
214
215 // abandon the url if it is a call to a script
216
217 if (this.url.indexOf('?') >= 0)
218
219 { this.url = null;
220
221 }
222
223 }
224
225
226
227 public static boolean dirParent(String parent, String child)
228
229 { while (child.length() >= parent.length())
230
231 { child = child.substring(0, child.lastIndexOf('/'));
232
233
234
235 if (parent.equals(child))
236
237 { return true;
238
239 }
240
241 }
242
243 return false;
244
245 }
246
247
248
249 public static int dirPos(String from, String to)
250
251 { String fromdir;
252
253 String todir;
254
255
256
257 fromdir = from.substring(0, from.lastIndexOf('/'));
258
259 todir = to.substring(0, to.lastIndexOf('/'));
260
261
262
263 if (fromdir.equals(todir))
264
265 { return 0;
266
267 }
268
269 if (fromdir.length() > todir.length())
270
271 { if (dirParent(todir, fromdir) == true)
272
273 { return -1;
274
275 }
276
277 }
278
279 else
280
281 { if (dirParent(fromdir, todir) == true)
282
283 { return 1;
284
285 }
286
287 }
288
289 return -2;
290
291 }
292
293
294
295 public static int commonLevels(String a, String b)
296
297 { String path_a, path_b;
298
299 int offset_a, offset_b;
300
301 String dir_a, dir_b;
302
303 int common;
304
305
306
307 path_a = path(a);
308
309 path_b = path(b);
310
311 common = 0;
312
313
314
315 if (path_a == null || path_b == null)
316
317 { return 0;
318
319 }
320
321
322
323 do
324
325 { offset_a = path_a.indexOf('/');
326
327 offset_b = path_b.indexOf('/');
328
329
330
331 if (offset_a > 0)
332
333 { dir_a = path_a.substring(0, offset_a);
334
335 if (path_a.length() > offset_a + 1)
336
337 { path_a = path_a.substring(offset_a+1);
338
339 }
340
341 else
342
343 { path_a = null;
344
345 }
346
347 }
348
349 else
350
351 { dir_a = path_a;
352
353 path_a = null;
354
355 }
356
357
358
359 if (offset_b > 0)
360
361 { dir_b = path_b.substring(0, offset_b);
362
363 if (path_b.length() > offset_b + 1)
364
365 { path_b = path_b.substring(offset_b+1);
366
367 }
368
369 else
370
371 { path_b = null;
372
373 }
374
375 }
376
377 else
378
379 { dir_b = path_b;
380
381 path_b = null;
382
383 }
384
385
386
387 if (dir_a.length() != dir_b.length() ||
388
389 dir_a.equals(dir_b) == false)
390
391 { return common;
392
393 }
394
395 common ++;
396
397 } while (path_a != null && path_b != null);
398
399
400
401 return common;
402
403 }
404
405
406
407 public static String dirLevelName(String url, int level)
408
409 { String lpath;
410
411 String dir;
412
413 int thislevel = 0;
414
415 int offset;
416
417
418
419 lpath = path(url);
420
421 if (lpath == null)
422
423 { return null;
424
425 }
426
427 do
428
429 { offset = lpath.indexOf('/');
430
431
432
433 if (offset >= 0)
434
435 { dir = lpath.substring(0, offset);
436
437 lpath = lpath.substring(offset+1);
438
439 }
440
441 else
442
443 { dir = lpath;
444
445 }
446
447
448
449 thislevel ++;
450
451
452
453 if (level == thislevel)
454
455 { return dir;
456
457 }
458
459 } while (offset >= 0);
460
461
462
463 return null;
464
465 }
466
467
468
469 public static int dirLevel(String url)
470
471 { String lpath;
472
473 int level;
474
475 int offset;
476
477
478
479 lpath = path(url);
480
481 if (lpath == null)
482
483 { return 0;
484
485 }
486
487 level = 0;
488
489 do
490
491 { offset = lpath.indexOf('/');
492
493 if (offset >= 0)
494
495 { lpath = lpath.substring(offset+1);
496
497 }
498
499 level ++;
500
501 } while (offset >= 0);
502
503
504
505 return level;
506
507 }
508
509
510
511 public static String tidyURL(String urlString, boolean permitScript)
512
513 { int cref;
514
515 int c;
516
517 int sref;
518
519 String tmp, s;
520
521
522
523 s = urlString;
524
525 cref = s.indexOf(':');
526
527 if (cref >= 0)
528
529 { for (c = 0; c < cref; c ++)
530
531 { if (s.charAt(c) != '+' &&
532
533 s.charAt(c) != '-' &&
534
535 s.charAt(c) != '.' &&
536
537 Character.isLetterOrDigit(s.charAt(c)) == false)
538
539 { break;
540
541 }
542
543 }
544
545 }
546
547 else
548
549 { c = 0;
550
551 }
552
553
554
555 if (cref == c)
556
557 { tmp = s.substring(0, c).toLowerCase();
558
559 s = tmp + s.substring(c, s.length());
560
561 if (s.lastIndexOf('/') == cref + 2)
562
563 { s = s + "/";
564
565 }
566
567 }
568
569 else
570
571 { sref = s.indexOf("//");
572
573 if (sref == 0)
574
575 { s = "http:" + s;
576
577 }
578
579 else
580
581 { s = "http://" + s;
582
583 }
584
585 if (s.lastIndexOf('/') == 6)
586
587 { s = s + "/";
588
589 }
590
591 }
592
593 return s;
594
595 }
596
597 public static String tidyURL(String urlString)
598 { return tidyURL(urlString, false);
599 }
600
601
602
603 public static URL toURL(String urlString)
604
605 { URL reply;
606
607
608
609 try
610
611 { reply = new URL(URLString.tidyURL(urlString));
612
613 }
614
615 catch (MalformedURLException ex)
616
617 { return null;
618
619 }
620
621 return reply;
622
623 }
624
625
626
627 public void tidy()
628
629 { this.url = tidyURL(this.url);
630
631 }
632
633
634
635 public boolean isNull()
636
637 { if (this.url == null)
638
639 { return true;
640
641 }
642
643 return false;
644
645 }
646
647
648
649 public static boolean isForeign(String home, URL url)
650
651 { if (url.toString().length() < home.length())
652
653 { return true;
654
655 }
656
657
658
659 if (home.equals(url.toString().substring(0, home.length())))
660
661 { return false;
662
663 }
664
665 return true;
666
667 }
668
669
670 /**
671 * Remove any trailing "#localanchor" local reference from a URL string
672 *
673 * @param <code>String</code> the original url
674 * @return <code>String</code> the cleaned url, sans local references
675 */
676 public static String noRef(String target)
677 { if (target == null)
678 { return null;
679 }
680 if (target.indexOf('#') >= 0)
681 { if (target.indexOf('#') == 0)
682 { return null;
683 }
684 target = target.substring(0, target.indexOf('#'));
685 }
686 return target;
687 }
688
689 /**
690 * Obtain the trailing local anchor reference from a URL,
691 * if it exists.
692 * e.g. for http://www.test.mdx.ac.uk/example/index.html#credits the
693 * response would be "credits"
694 *
695 * @param <code>String</code> the original url.
696 * @return <code>String</code> the local reference. <code>null</code> is
697 * returned if no local reference exists.
698 */
699 public static String subRef(String target)
700 { if (target.indexOf('#') >= 0)
701 { return target.substring(target.indexOf('#')+1);
702 }
703 return null;
704 }
705
706 /**
707 * Clean a URL of the form GET parameters.
708 *
709 * @param <code>String</code> the original url, with GET message parameters
710 * @return <code>String</code> the cleaned url.
711 */
712 public static String noParameters(String url)
713 { int c;
714
715 c = url.indexOf("?");
716 if (c < 0)
717 { return url;
718 }
719 return url.substring(0, c);
720 }
721
722 /**
723 * Obtain the form parameters from a url
724 *
725 * @param <code>String</code> the original url, with GET message parameters
726 * @return <code>String</code> the form "GET" parameters. The leading '?' is
727 * preserved in the reply. If no parameters were
728 * given, the result is <code>null</code>.
729 */
730 public static String formParameters(String url)
731 { int c;
732
733 c = url.indexOf("?");
734 if (c < 0)
735 { return null;
736 }
737 return url.substring(c);
738 }
739
740 /**
741 * Flatten a URL, making the '?' and '&' items into underscores
742 *
743 * @param <code>String</code> the original url, with GET message parameters
744 * @return <code>String</code> the "flattened" url
745 */
746 public static String flattenedUrl(String oldUrl)
747 { StringBuffer reply;
748 int c;
749 boolean modified = false;
750
751 // replace ? with _
752 // replate & with _
753 reply = new StringBuffer(oldUrl);
754 for (c = 0; c < reply.length(); c ++)
755 { if (reply.charAt(c) == '?' ||
756 reply.charAt(c) == '&')
757 { reply.setCharAt(c, '_');
758 modified = true;
759 }
760 }
761
762 if (modified == true)
763 { c = oldUrl.indexOf(".html");
764 if (c >= 0)
765 { while (c < oldUrl.length() - 5)
766 { reply.setCharAt(c, reply.charAt(c+5));
767 c ++;
768 }
769 reply.setCharAt(c ++, '.');
770 reply.setCharAt(c ++, 'h');
771 reply.setCharAt(c ++, 't');
772 reply.setCharAt(c ++, 'm');
773 reply.setCharAt(c ++, 'l');
774 }
775 }
776
777 return reply.toString();
778 }
779
780
781 /**
782 * Provide the complete pathname in the given url
783 * eg. "example/index.html" for http://www.test.mdx.ac.uk/example/index.html
784 *
785 * @param <code>String</code> url of page
786 * @return <code>String</code> leaf file name
787 */
788 public static String pathName(String url)
789
790 { int offset;
791
792 String remainder;
793
794
795
796 if (url == null)
797
798 { return null;
799
800 }
801
802
803
804 offset = url.indexOf("//");
805
806 if (offset < 0)
807
808 { return null;
809
810 }
811
812
813
814 remainder = url.substring(offset + 2);
815
816 offset = remainder.indexOf('/');
817
818 if (offset < 0)
819
820 { return null;
821
822 }
823
824 return remainder.substring(offset + 1);
825
826 }
827
828
829
830 /**
831 * Provide the leaf filename in the given url
832 * eg. "index.html" for http://www.test.mdx.ac.uk/example/index.html
833 *
834 * @param <code>String</code> url of page
835 * @return <code>String</code> leaf file name
836 */
837 public static String leafName(String url)
838
839 { int offset;
840
841 int dot;
842
843
844
845 if (url == null)
846
847 { return null;
848
849 }
850
851 offset = url.lastIndexOf('/');
852
853 if (offset < 0)
854
855 { return null;
856
857 }
858
859 if (offset == url.length() - 1)
860
861 { return null;
862
863 }
864
865 dot = url.lastIndexOf('/');
866
867 if (dot < offset)
868
869 { return null;
870
871 }
872
873 return url.substring(offset+1);
874
875 }
876
877
878 /**
879 * Provide the hostname in the given url
880 * eg. "www.test.mdx.ac.uk" for http://www.test.mdx.ac.uk/example/index.html
881 *
882 * @param <code>String</code> url of page
883 * @return <code>String</code> host name
884 */
885
886 public static String host(String url)
887
888 { String scheme, tail;
889
890 int spos;
891
892
893
894 spos = url.indexOf("//");
895
896 if (spos >= 0)
897
898 { scheme = url.substring(0, spos + 2);
899
900 tail = url.substring(spos + 2, url.length());
901
902 // do sub-areas
903
904 spos = tail.indexOf('/');
905
906 if (spos >= 0)
907
908 { tail = tail.substring(0, spos+1);
909
910 }
911
912 scheme = scheme + tail;
913
914 return scheme;
915
916 }
917
918 return null;
919
920 }
921
922
923
924 /**
925 * Return the logical path of the url on the remote server
926 * eg. "example" for http://www.test.mdx.ac.uk/example/index.html
927 *
928 * @param <code>String</code> url of page
929 * @return <code>String</code> logical path
930 */
931 public static String path(String url)
932
933 { String tail;
934
935 int offset;
936
937 String leaf;
938
939
940
941 if (url == null)
942
943 { return null;
944
945 }
946
947
948
949 offset = url.indexOf("//");
950
951 if (offset < 0)
952
953 { return null;
954
955 }
956
957
958
959 tail = url.substring(offset+2);
960
961 offset = tail.indexOf('/');
962
963 if (offset < 0)
964
965 { return null;
966
967 }
968
969
970
971 tail = tail.substring(offset+1);
972
973 if (tail == null || tail.length() == 0)
974
975 { return null;
976
977 }
978
979
980
981 offset = tail.lastIndexOf('/');
982
983 if (offset < 0)
984
985 { return null;
986
987 }
988
989 else
990
991 { tail = tail.substring(0, offset);
992
993 }
994
995 return tail;
996
997 }
998
999
1000
1001 /**
1002 * Give the 'dot' extension of a url;
1003 * eg. "html" for http://www.test.mdx.ac.uk/example/index.html
1004 *
1005 * @param <code>String</code> url
1006 * @return <code>String</code> extension.
1007 */
1008 public static String extension(String url)
1009
1010 { int offset;
1011
1012 String leaf;
1013
1014
1015
1016 if (url == null)
1017
1018 { return null;
1019
1020 }
1021
1022
1023
1024 offset = url.lastIndexOf('/');
1025
1026 if (offset >= 0 && offset < url.length() - 1)
1027
1028 { leaf = url.substring(offset+1, url.length());
1029
1030 offset = leaf.lastIndexOf('.');
1031
1032 // if we've got an extension then use it
1033
1034 if (offset >= 0)
1035
1036 { int poffset;
1037
1038 // get parameter offset
1039 poffset = leaf.indexOf('?');
1040 if (poffset < 0)
1041 { poffset = leaf.length();
1042 }
1043
1044 // give leaf value
1045 return leaf.substring(offset, poffset);
1046
1047 }
1048
1049 else
1050
1051 { return null;
1052
1053 }
1054
1055 }
1056
1057 return null;
1058
1059 }
1060
1061
1062
1063 public static String extension(URL url)
1064
1065 { return extension(url.toString());
1066
1067 }
1068
1069
1070
1071 public String toString()
1072
1073 { return this.url;
1074
1075 }
1076
1077}
Note: See TracBrowser for help on using the repository browser.