Changeset 13172 for trunk/gsdl/perllib/doc.pm
- Timestamp:
- 2006-10-27T13:41:01+13:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/doc.pm
r13050 r13172 39 39 use File::stat; 40 40 ##use hashdoc; 41 use docprint; 41 42 42 43 # the document type may be indexed_doc, nonindexed_doc, or … … 221 222 return $doc_type if (defined $doc_type); 222 223 return "indexed_doc"; 223 }224 225 sub _escape_text {226 my $self = shift (@_);227 my ($text) = @_;228 # special characters in the gml encoding229 $text =~ s/&/&/g; # this has to be first...230 $text =~ s/</</g;231 $text =~ s/>/>/g;232 $text =~ s/\"/"/g;233 234 return $text;235 }236 237 sub buffer_section_xml {238 my $self = shift (@_);239 my ($section) = @_;240 241 my $section_ptr = $self->_lookup_section ($section);242 return "" unless defined $section_ptr;243 244 my $all_text = "<Section>\n";245 $all_text .= " <Description>\n";246 247 # output metadata248 foreach my $data (@{$section_ptr->{'metadata'}}) {249 my $escaped_value = $self->_escape_text($data->[1]);250 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";251 }252 253 $all_text .= " </Description>\n";254 255 # output the text256 $all_text .= " <Content>";257 $all_text .= $self->_escape_text($section_ptr->{'text'});258 $all_text .= "</Content>\n";259 260 # output all the subsections261 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {262 $all_text .= $self->buffer_section_xml("$section.$subsection");263 }264 265 $all_text .= "</Section>\n";266 267 # make sure no nasty control characters have snuck through268 # (XML::Parser will barf on anything it doesn't consider to be269 # valid UTF-8 text, including things like \c@, \cC etc.)270 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;271 272 return $all_text;273 }274 275 276 277 278 sub output_section {279 my $self = shift (@_);280 my ($handle, $section) = @_;281 282 print $handle $self->buffer_section_xml($section);283 }284 285 my $dc_set = { Title => 1,286 Creator => 1,287 Subject => 1,288 Description => 1,289 Publisher => 1,290 Contributor => 1,291 Date => 1,292 Type => 1,293 Format => 1,294 Identifier => 1,295 Source => 1,296 Language => 1,297 Relation => 1,298 Coverage => 1,299 Rights => 1};300 301 302 303 304 # Build up dublin_core metadata. Priority given to dc.* over ex.*305 # This method was apparently added by Jeffrey and committed by Shaoqun.306 # But we don't know why it was added, so not using it anymore.307 sub new_buffer_dc_section {308 309 my $self = shift(@_);310 my ($section, $version) = @_;311 312 # build up string of dublin core metadata313 $section="" unless defined $section;314 315 my $section_ptr=$self->_lookup_section($section);316 return "" unless defined $section_ptr;317 foreach my $data (@{$section_ptr->{'metadata'}}){318 my $escaped_value = $self->_escape_text($data->[1]);319 my $dc_element = $data->[0];320 321 my @array = split('\.',$dc_element);322 my ($type,$name);323 324 if(defined $array[1])325 {326 $type = $array[0];327 $name = $array[1];328 }329 else330 {331 $type = "ex";332 $name = $array[0];333 }334 335 $all_text .= ' <Metadata Type="'. $type.'" Name="'.$name.'">'. $escaped_value. "</Metadata>\n";336 }337 return $all_text;338 }339 340 341 sub buffer_dc_section {342 my $self = shift(@_);343 my ($section, $version) = @_;344 345 # build up string of dublin core metadata346 $section="" unless defined $section;347 348 my $section_ptr=$self->_lookup_section($section);349 return "" unless defined $section_ptr;350 351 352 my $explicit_dc = {};353 my $explicit_ex = {};354 355 my $all_text="";356 foreach my $data (@{$section_ptr->{'metadata'}}){357 foreach my $temp (@$data)358 {359 print "($temp) ";360 }361 print "\n";362 my $escaped_value = $self->_escape_text($data->[1]);363 if ($data->[0]=~ m/^dc\./) {364 $data->[0] =~ tr/[A-Z]/[a-z]/;365 366 $data->[0] =~ m/^dc\.(.*)/;367 my $dc_element = $1;368 369 if (!defined $explicit_dc->{$dc_element}) {370 $explicit_dc->{$dc_element} = [];371 }372 push(@{$explicit_dc->{$dc_element}},$escaped_value);373 374 #$all_text .= ' <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";375 if (defined $version && ($version eq "oai_dc")) {376 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";377 }378 else {379 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";380 }381 382 }383 elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {384 $data->[0] =~ m/^(ex\.)?(.*)/;385 my $ex_element = $2;386 my $lc_ex_element = lc($ex_element);387 388 if (defined $dc_set->{$ex_element}) {389 if (!defined $explicit_ex->{$lc_ex_element}) {390 $explicit_ex->{$lc_ex_element} = [];391 }392 push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);393 }394 }395 }396 397 # go through dc_set and for any element *not* defined in explicit_dc398 # that do exist in explicit_ex, add it in as metadata399 foreach my $k ( keys %$dc_set ) {400 my $lc_k = lc($k);401 402 if (!defined $explicit_dc->{$lc_k}) {403 if (defined $explicit_ex->{$lc_k}) {404 405 foreach my $v (@{$explicit_ex->{$lc_k}}) {406 my $dc_element = $lc_k;407 my $escaped_value = $v;408 409 if (defined $version && ($version eq "oai_dc")) {410 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";411 }412 else {413 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";414 }415 416 }417 }418 }419 }420 421 if ($all_text eq "") {422 $all_text .= " There is no Dublin Core metatdata in this document\n";423 }424 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;425 426 return $all_text;427 }428 429 430 # Print out dublin_core metadata431 # changed back to using old method, don't know why Jeffrey changed this432 sub output_dc_section {433 my $self = shift(@_);434 my ($handle, $section, $version) = @_;435 436 my $all_text = $self->buffer_dc_section($section,$version);437 #my $all_text = $self->new_buffer_dc_section($section,$version);438 439 print $handle $all_text;440 224 } 441 225 … … 555 339 print STDERR "doc::set_OID could not write to $filename\n"; 556 340 } else { 557 $self->output_section('OUTFILE', $self->get_top_section(),558 undef, 1);341 my $doc_text = &docprint::get_section_xml($self, $self->get_top_section()); 342 print OUTFILE $doc_text; 559 343 close (OUTFILE); 560 344 } … … 576 360 if (!defined $OID) 577 361 { 578 my $hash_text = $self->buffer_section_gml($self->get_top_section(), 579 undef, 1); 362 my $hash_text = &docprint::get_section_xml($self, $self->get_top_section()); 580 363 my $hash_len = length($hash_text); 581 364 … … 976 759 } 977 760 978 # returns a xml element of the form <MetadataList><Metadata name="metadata-name">metadata_value</Metadata>...</MetadataList>979 sub get_top_metadata_list{980 my $self = shift (@_);981 982 my @topmetadata =$self->get_all_metadata($self->get_top_section());983 my $metadatalist ='<MetadataList>';984 985 foreach my $i (@topmetadata){986 foreach my $j (@$i){987 my %metaMap = @$j;988 foreach my $key (keys %metaMap){989 $metadatalist .='<Metadata name='."\"$key\"".'>'.$self->_escape_text($metaMap{$key}).'</Metadata>'."\n";990 }991 }992 }993 994 $metadatalist .='</MetadataList>';995 return $metadatalist;996 }997 998 999 761 # $value is optional 1000 762 sub delete_metadata {
Note:
See TracChangeset
for help on using the changeset viewer.