Changeset 23362
- Timestamp:
- 2010-12-01T11:40:36+13:00 (13 years ago)
- Location:
- main/trunk/greenstone2/perllib
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/doc.pm
r23278 r23362 64 64 # the docsave processor now calls set_lastmodified 65 65 66 $self-> {'source_path'} = $source_filename;66 $self->set_source_path($source_filename); 67 67 68 68 if (defined $source_filename) { … … 76 76 return $self; 77 77 } 78 79 80 sub set_source_path 81 { 82 my $self = shift @_; 83 my ($source_filename) = @_; 84 85 if (defined $source_filename) { 86 # On Windows the source_filename can be in terse DOS format 87 # e.g. test~1.txt 88 89 $self->{'terse_source_path'} = $source_filename; 90 91 if (-e $source_filename) { 92 # See if we can do better for Windows with a filename 93 if ($ENV{'GSDLOS'} =~ /^windows$/i) { 94 require Win32; 95 $self->{'source_path'} = Win32::GetLongPathName($source_filename); 96 } 97 else { 98 # For Unix-based systems, there is no difference between the two 99 $self->{'source_path'} = $source_filename; 100 } 101 } 102 else { 103 print STDERR "Warning: In doc::set_source_path(), file\n"; 104 print STDERR " $source_filename\n"; 105 print STDERR " does not exist\n"; 106 107 # (default) Set it to whatever we were given 108 $self->{'source_path'} = $source_filename; 109 } 110 } 111 else { 112 # Previous code for setting source_path allowed for 113 # it to be undefined, so continue this practice 114 $self->{'terse_source_path'} = undef; 115 $self->{'source_path'} = undef; 116 } 117 } 118 119 120 sub get_source_path 121 { 122 my $self = shift @_; 123 124 return $self->{'terse_source_path'}; 125 } 126 78 127 # set lastmodified for OAI purposes, added by GRB, moved by kjdon 79 128 sub set_lastmodified { 80 129 my $self = shift (@_); 81 130 82 my $source_path = $self->{' source_path'};131 my $source_path = $self->{'terse_source_path'}; 83 132 84 133 if (defined $source_path && (-e $source_path)) { … … 207 256 my $self = shift (@_); 208 257 209 return $self->{' source_path'};258 return $self->{'terse_source_path'}; 210 259 } 211 260 -
main/trunk/greenstone2/perllib/ghtml.pm
r22952 r23362 207 207 208 208 # a numeric entity 209 if ($entity =~ /^\#0*(\d+)/) { 210 my $code=$1; 209 my $code = undef; 210 if ($entity =~ m/^\#0*(\d+)$/) { 211 $code=$1; 212 } 213 elsif ($entity =~ m/^\#x([0-9A-F]+)$/i) { 214 $code=hex($1); 215 } 216 217 if (defined $code) { 211 218 # non-standard Microsoft breakage, as usual 212 219 if ($code < 0x9f) { # code page 1252 uses reserved bytes -
main/trunk/greenstone2/perllib/unicode.pm
r23304 r23362 619 619 620 620 if (!&is_url_encoded($text)) { 621 $text =~ s/([^ A-Z0-9\ \.\-\_])/sprintf("%%%02X", ord($1))/iseg;621 $text =~ s/([^0-9A-Z\ \.\-\_])/sprintf("%%%02X", ord($1))/iseg; 622 622 # return the url-encoded character entity for underscore back to the entity 623 623 $text =~ s/%26%23095%3B/&\#095;/g; … … 629 629 my ($text) = @_; 630 630 631 $text =~ s/\%([A-F0-9]{2})/pack('C', hex($1))/ige; 631 $text =~ s/\%([0-9A-F]{2})/pack('C', hex($1))/ige; 632 $text =~ s/\&\#x([0-9A-F]+);/pack('C', hex($1))/ige; 633 $text =~ s/\&\#([0-9]+);/pack('C', $1)/ige; 634 632 635 return $text; 633 636 } … … 635 638 sub is_url_encoded { 636 639 my ($text) = @_; 637 return ($text =~ m/\%([ A-F0-9]{2})/);640 return ($text =~ m/\%([0-9A-F]{2})/i) || ($text =~ m/\&\#x([0-9A-F]+;)/i) || ($text =~ m/\&\#([0-9]+;)/i); 638 641 } 639 642 … … 756 759 757 760 my @url_encoded_chars 758 = map { $_ > 128 ? # if wide character... 759 sprintf("%%%2X", $_) : # \x{...} 760 chr($_) 761 } unpack("U*", $str_in); # unpack Unicode characters 761 = map { $_ > 255 ? # Needs to be represent in entity form 762 sprintf("&#x%X;",$_) : 763 $_ > 128 ? # Representable in %XX form 764 sprintf("%%%2X", $_) : 765 chr($_) # otherwise, Ascii char 766 } unpack("U*", $str_in); # Unpack Unicode characters 762 767 763 768 … … 775 780 my $str_out = $str_in; 776 781 777 $str_out =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; 782 $str_out =~ s/&#x([0-9A-F]+);/chr(hex($1))/eig; 783 $str_out =~ s/&#([0-9]+);/chr($1)/eig; 784 $str_out =~ s/%([0-9A-F]{2})/chr(hex($1))/eig; 778 785 779 786 return $str_out; -
main/trunk/greenstone2/perllib/util.pm
r23314 r23362 26 26 package util; 27 27 28 use strict; 29 30 use Encode; 28 31 use File::Copy; 29 32 use File::Basename; 30 31 use strict;32 33 33 34 34 # removes files (but not directories) … … 513 513 } 514 514 515 # Primarily for filenames generated by processing 516 # content of HTML files (which are mapped to UTF-8 internally) 517 # 518 # To turn this into an octet string that really exists on the file 519 # system: 520 # 1. don't need to do anything special for Unix-based systems 521 # (as underlying file system is byte-code) 522 # 2. need to map to short DOS filenames for Windows 523 524 sub utf8_to_real_filename 525 { 526 my ($utf8_filename) = @_; 527 528 my $real_filename; 529 530 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 531 require Win32; 532 my $unicode_filename = decode("utf8",$utf8_filename); 533 $real_filename = Win32::GetShortPathName($unicode_filename); 534 } 535 else { 536 $real_filename = $utf8_filename; 537 } 538 539 return $real_filename; 540 } 541 542 543 sub fd_exists 544 { 545 my $filename_full_path = shift @_; 546 my $test_op = shift @_ || "-e"; 547 548 # By default tests for existance of file or directory (-e) 549 # Can be made more specific by providing second parameter (e.g. -f or -d) 550 551 my $exists = 0; 552 553 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 554 require Win32; 555 my $filename_short_path = Win32::GetShortPathName($filename_full_path); 556 if (!defined $filename_short_path) { 557 # Was probably still in UTF8 form (not what is needed on Windows) 558 my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)"; 559 if (defined $unicode_filename_full_path) { 560 $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path); 561 } 562 } 563 $filename_full_path = $filename_short_path; 564 } 565 566 if (defined $filename_full_path) { 567 $exists = eval "($test_op \$filename_full_path)"; 568 } 569 570 return $exists; 571 } 572 573 sub file_exists 574 { 575 my ($filename_full_path) = @_; 576 577 return fd_exists($filename_full_path,"-f"); 578 } 579 580 sub dir_exists 581 { 582 my ($filename_full_path) = @_; 583 584 return fd_exists($filename_full_path,"-d"); 585 } 515 586 516 587 … … 934 1005 935 1006 1007 936 1008 # returns 1 if filename1 and filename2 point to the same 937 1009 # file or directory … … 953 1025 } 954 1026 1027 1028 sub filename_within_directory 1029 { 1030 my ($filename,$within_dir) = @_; 1031 1032 my $dirsep = &util::get_dirsep(); 1033 if ($within_dir !~ m/$dirsep$/) { 1034 $within_dir .= $dirsep; 1035 } 1036 1037 $within_dir =~ s/\\/\\\\/g; # escape DOS style file separator 1038 1039 if ($filename =~ m/^$within_dir(.*)$/) { 1040 $filename = $1; 1041 } 1042 1043 return $filename; 1044 } 1045 955 1046 sub filename_within_collection 956 1047 { … … 960 1051 961 1052 if (defined $collect_dir) { 962 my $dirsep = &util::get_dirsep(); 963 if ($collect_dir !~ m/$dirsep$/) { 964 $collect_dir .= $dirsep; 965 } 966 967 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator 968 1053 969 1054 # if from within GSDLCOLLECTDIR, then remove directory prefix 970 1055 # so source_filename is realative to it. This is done to aid … … 976 1061 # GSDLCOLLECTDIR subsequently needs to be put back on to turn 977 1062 # it back into a full pathname. 978 979 if ($filename =~ /^$collect_dir(.*)$/) { 980 $filename = $1; 981 } 1063 1064 $filename = filename_within_directory($filename,$collect_dir); 982 1065 } 983 1066 984 1067 return $filename; 985 1068 } 1069 1070 sub prettyprint_file 1071 { 1072 my ($base_dir,$file) = @_; 1073 1074 my $filename_full_path = &util::filename_cat($base_dir,$file); 1075 1076 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 1077 require Win32; 1078 1079 # For some reason base_dir in the form c:/a/b/c 1080 # This leads to confusion later on, so turn it back into 1081 # the more usual Windows form 1082 $base_dir =~ s/\//\\/g; 1083 my $long_base_dir = Win32::GetLongPathName($base_dir); 1084 my $long_full_path = Win32::GetLongPathName($filename_full_path); 1085 1086 $file = filename_within_directory($long_full_path,$long_base_dir); 1087 } 1088 1089 return $file; 1090 } 1091 1092 1093 sub upgrade_if_dos_filename 1094 { 1095 my ($filename_full_path) = @_; 1096 1097 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 1098 require Win32; 1099 # Ensure any DOS-like filename, such as test~1.txt, has been upgraded 1100 # to its long (Windows) version 1101 $filename_full_path = Win32::GetLongPathName($filename_full_path); 1102 # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone) 1103 $filename_full_path =~ s/^(.)/\l$1/; 1104 } 1105 1106 return $filename_full_path; 1107 } 1108 986 1109 987 1110 sub filename_is_absolute
Note:
See TracChangeset
for help on using the changeset viewer.