Changeset 22840
- Timestamp:
- 2010-09-02T14:18:47+12:00 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/GreenstoneMETSPlugin.pm
r21803 r22840 32 32 package GreenstoneMETSPlugin; 33 33 34 use Encode; 34 35 use ghtml; 35 36 … … 45 46 } 46 47 47 my $arguments = [ 48 ]; 48 my $arguments = [ { 'name' => "process_exp", 49 'desc' => "{BasePlugin.process_exp}", 50 'type' => "regexp", 51 'reqd' => "no", 52 'deft' => &get_default_process_exp() 53 } 54 ]; 55 49 56 my $options = { 'name' => "GreenstoneMETSPlugin", 50 57 'desc' => "{GreenstoneMETSPlugin.desc}", 51 58 'abstract' => "no", 52 'inherits' => "yes" }; 59 'inherits' => "yes", 60 'args' => $arguments }; 61 53 62 54 63 … … 100 109 $filename =~ s/docmets.xml$/doctxt.xml/; 101 110 102 if (!open (FILEIN, "<$filename")){111 if (!open (FILEIN, "<:utf8", $filename)) { 103 112 print STDERR "Warning: unable to open the $filename\n"; 104 113 $self->{'xmltxt'} = ""; … … 111 120 } 112 121 } 122 113 123 my $xml_parser = XML::XPath->new (xml=> $xml_text); 114 124 #my $xml_tree = $xml_parser->parse ($xml_text); … … 222 232 223 233 foreach my $md_pair (@$md_list){ 224 my $metadata_name = $md_pair->{'metadata_name'}; 225 my $metadata_value = $md_pair->{'metadata_value'}; 226 $self->{'doc_obj'}->add_utf8_metadata($self->{'section'}, $metadata_name, $metadata_value); 234 # text read in by XML::Parser is in Perl's binary byte value 235 # form ... need to explicitly make it UTF-8 236 237 my $metadata_name = decode("utf8",$md_pair->{'metadata_name'}); 238 my $metadata_value = decode("utf8",$md_pair->{'metadata_value'}); 239 240 $self->{'doc_obj'}->add_utf8_metadata($self->{'section'}, 241 $metadata_name, $metadata_value); 227 242 } 228 243 … … 231 246 232 247 foreach my $section_content (@$content_list){ 248 # Don't need to decode $content as this has been readin in 249 # through XPath which (unlike XML::Parser) correctly sets 250 # the string to be UTF8 rather than a 'binary' string of bytes 233 251 my $content = $section_content->{'section_content'}; 252 234 253 $self->{'doc_obj'}->add_utf8_text($self->{'section'},$content); 235 254 }
Note:
See TracChangeset
for help on using the changeset viewer.