Changeset 16578 for gsdl/trunk/perllib/doc.pm
- Timestamp:
- 2008-07-29T13:07:59+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/doc.pm
r15894 r16578 39 39 use ghtml; 40 40 use File::stat; 41 use MIME::Base64; 41 42 ##use hashdoc; 42 43 use docprint; … … 145 146 my ($source_filename) = @_; 146 147 147 $self->set_metadata_element ($self->get_top_section(), 148 # Since the gsdlsourcefilename element goes into the doc.xml it has 149 # to be utf8. However, it should also *represent* the source filename 150 # (in the import directory) which may not be utf8 at all. 151 # For instance, if this meta element (gsdlsourcefilename) will be 152 # used by other applications that parse doc.xml in order to locate 153 # gsdlsourcefilename. Therefore, the solution is to encode the real 154 # filename into Base64 encoding which is a binary-to-text encoding, 155 # meaning that the resulting string is ASCII (utf8). See also 156 # MIME#Content-Transfer-Encoding in Wiki. 157 158 print STDERR "******Base64 encoding the non-utf8 gsdl_source_filename $source_filename "; 159 160 # remove "import/" from path, base 64 encode just the source_filename 161 # (remove trailing spaces and newlines) and prefix "import/" again. 162 my $import_dir = "import"; 163 my $srcfilename = $source_filename; 164 $srcfilename =~ s/^$import_dir(?:\\|\/)+(.*)$/$1/; 165 166 print STDERR "-> $srcfilename -> "; 167 $srcfilename = &MIME::Base64::encode_base64($srcfilename); 168 $srcfilename =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle 169 170 $source_filename = &util::filename_cat($import_dir, $srcfilename); 171 print STDERR "$source_filename\n"; 172 173 $self->set_utf8_metadata_element ($self->get_top_section(), 148 174 "gsdlsourcefilename", 149 175 $source_filename); … … 154 180 my ($converted_filename) = @_; 155 181 156 $self->set_metadata_element ($self->get_top_section(), 182 # we know the converted filename is utf8 183 $self->set_utf8_metadata_element ($self->get_top_section(), 157 184 "gsdlconvertedfilename", 158 185 $converted_filename); … … 826 853 #print STDERR "###$field=$value\n"; 827 854 # double check that the value is utf-8 828 if (unicode::ensure_utf8(\$value)) { 829 print STDERR "doc::add_utf8_metadata: warning: '$field' wasn't utf8\n"; 855 if (!&unicode::check_is_utf8($value)) { 856 print STDERR "doc::add_utf8_metadata - warning: '$field''s value $value wasn't utf8. "; 857 &unicode::ensure_utf8(\$value); 858 print STDERR "Tried converting to utf8: $value\n"; 830 859 } 831 860
Note:
See TracChangeset
for help on using the changeset viewer.