Changeset 13172
- Timestamp:
- 2006-10-27T13:41:01+13:00 (17 years ago)
- Location:
- trunk/gsdl/perllib
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/doc.pm
r13050 r13172 39 39 use File::stat; 40 40 ##use hashdoc; 41 use docprint; 41 42 42 43 # the document type may be indexed_doc, nonindexed_doc, or … … 221 222 return $doc_type if (defined $doc_type); 222 223 return "indexed_doc"; 223 }224 225 sub _escape_text {226 my $self = shift (@_);227 my ($text) = @_;228 # special characters in the gml encoding229 $text =~ s/&/&/g; # this has to be first...230 $text =~ s/</</g;231 $text =~ s/>/>/g;232 $text =~ s/\"/"/g;233 234 return $text;235 }236 237 sub buffer_section_xml {238 my $self = shift (@_);239 my ($section) = @_;240 241 my $section_ptr = $self->_lookup_section ($section);242 return "" unless defined $section_ptr;243 244 my $all_text = "<Section>\n";245 $all_text .= " <Description>\n";246 247 # output metadata248 foreach my $data (@{$section_ptr->{'metadata'}}) {249 my $escaped_value = $self->_escape_text($data->[1]);250 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";251 }252 253 $all_text .= " </Description>\n";254 255 # output the text256 $all_text .= " <Content>";257 $all_text .= $self->_escape_text($section_ptr->{'text'});258 $all_text .= "</Content>\n";259 260 # output all the subsections261 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {262 $all_text .= $self->buffer_section_xml("$section.$subsection");263 }264 265 $all_text .= "</Section>\n";266 267 # make sure no nasty control characters have snuck through268 # (XML::Parser will barf on anything it doesn't consider to be269 # valid UTF-8 text, including things like \c@, \cC etc.)270 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;271 272 return $all_text;273 }274 275 276 277 278 sub output_section {279 my $self = shift (@_);280 my ($handle, $section) = @_;281 282 print $handle $self->buffer_section_xml($section);283 }284 285 my $dc_set = { Title => 1,286 Creator => 1,287 Subject => 1,288 Description => 1,289 Publisher => 1,290 Contributor => 1,291 Date => 1,292 Type => 1,293 Format => 1,294 Identifier => 1,295 Source => 1,296 Language => 1,297 Relation => 1,298 Coverage => 1,299 Rights => 1};300 301 302 303 304 # Build up dublin_core metadata. Priority given to dc.* over ex.*305 # This method was apparently added by Jeffrey and committed by Shaoqun.306 # But we don't know why it was added, so not using it anymore.307 sub new_buffer_dc_section {308 309 my $self = shift(@_);310 my ($section, $version) = @_;311 312 # build up string of dublin core metadata313 $section="" unless defined $section;314 315 my $section_ptr=$self->_lookup_section($section);316 return "" unless defined $section_ptr;317 foreach my $data (@{$section_ptr->{'metadata'}}){318 my $escaped_value = $self->_escape_text($data->[1]);319 my $dc_element = $data->[0];320 321 my @array = split('\.',$dc_element);322 my ($type,$name);323 324 if(defined $array[1])325 {326 $type = $array[0];327 $name = $array[1];328 }329 else330 {331 $type = "ex";332 $name = $array[0];333 }334 335 $all_text .= ' <Metadata Type="'. $type.'" Name="'.$name.'">'. $escaped_value. "</Metadata>\n";336 }337 return $all_text;338 }339 340 341 sub buffer_dc_section {342 my $self = shift(@_);343 my ($section, $version) = @_;344 345 # build up string of dublin core metadata346 $section="" unless defined $section;347 348 my $section_ptr=$self->_lookup_section($section);349 return "" unless defined $section_ptr;350 351 352 my $explicit_dc = {};353 my $explicit_ex = {};354 355 my $all_text="";356 foreach my $data (@{$section_ptr->{'metadata'}}){357 foreach my $temp (@$data)358 {359 print "($temp) ";360 }361 print "\n";362 my $escaped_value = $self->_escape_text($data->[1]);363 if ($data->[0]=~ m/^dc\./) {364 $data->[0] =~ tr/[A-Z]/[a-z]/;365 366 $data->[0] =~ m/^dc\.(.*)/;367 my $dc_element = $1;368 369 if (!defined $explicit_dc->{$dc_element}) {370 $explicit_dc->{$dc_element} = [];371 }372 push(@{$explicit_dc->{$dc_element}},$escaped_value);373 374 #$all_text .= ' <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";375 if (defined $version && ($version eq "oai_dc")) {376 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";377 }378 else {379 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";380 }381 382 }383 elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {384 $data->[0] =~ m/^(ex\.)?(.*)/;385 my $ex_element = $2;386 my $lc_ex_element = lc($ex_element);387 388 if (defined $dc_set->{$ex_element}) {389 if (!defined $explicit_ex->{$lc_ex_element}) {390 $explicit_ex->{$lc_ex_element} = [];391 }392 push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);393 }394 }395 }396 397 # go through dc_set and for any element *not* defined in explicit_dc398 # that do exist in explicit_ex, add it in as metadata399 foreach my $k ( keys %$dc_set ) {400 my $lc_k = lc($k);401 402 if (!defined $explicit_dc->{$lc_k}) {403 if (defined $explicit_ex->{$lc_k}) {404 405 foreach my $v (@{$explicit_ex->{$lc_k}}) {406 my $dc_element = $lc_k;407 my $escaped_value = $v;408 409 if (defined $version && ($version eq "oai_dc")) {410 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";411 }412 else {413 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";414 }415 416 }417 }418 }419 }420 421 if ($all_text eq "") {422 $all_text .= " There is no Dublin Core metatdata in this document\n";423 }424 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;425 426 return $all_text;427 }428 429 430 # Print out dublin_core metadata431 # changed back to using old method, don't know why Jeffrey changed this432 sub output_dc_section {433 my $self = shift(@_);434 my ($handle, $section, $version) = @_;435 436 my $all_text = $self->buffer_dc_section($section,$version);437 #my $all_text = $self->new_buffer_dc_section($section,$version);438 439 print $handle $all_text;440 224 } 441 225 … … 555 339 print STDERR "doc::set_OID could not write to $filename\n"; 556 340 } else { 557 $self->output_section('OUTFILE', $self->get_top_section(),558 undef, 1);341 my $doc_text = &docprint::get_section_xml($self, $self->get_top_section()); 342 print OUTFILE $doc_text; 559 343 close (OUTFILE); 560 344 } … … 576 360 if (!defined $OID) 577 361 { 578 my $hash_text = $self->buffer_section_gml($self->get_top_section(), 579 undef, 1); 362 my $hash_text = &docprint::get_section_xml($self, $self->get_top_section()); 580 363 my $hash_len = length($hash_text); 581 364 … … 976 759 } 977 760 978 # returns a xml element of the form <MetadataList><Metadata name="metadata-name">metadata_value</Metadata>...</MetadataList>979 sub get_top_metadata_list{980 my $self = shift (@_);981 982 my @topmetadata =$self->get_all_metadata($self->get_top_section());983 my $metadatalist ='<MetadataList>';984 985 foreach my $i (@topmetadata){986 foreach my $j (@$i){987 my %metaMap = @$j;988 foreach my $key (keys %metaMap){989 $metadatalist .='<Metadata name='."\"$key\"".'>'.$self->_escape_text($metaMap{$key}).'</Metadata>'."\n";990 }991 }992 }993 994 $metadatalist .='</MetadataList>';995 return $metadatalist;996 }997 998 999 761 # $value is optional 1000 762 sub delete_metadata { -
trunk/gsdl/perllib/plugouts/BasPlugout.pm
r13064 r13172 41 41 42 42 my $arguments = [ 43 { 'name' => "group_size",43 { 'name' => "group_size", 44 44 'desc' => "{BasPlugout.group_size}", 45 45 'type' => "int", … … 73 73 'type' => "flag", 74 74 'reqd' => "no", 75 'hiddengli' => "no"} 75 'hiddengli' => "no"}, 76 { 'name' => "debug", 77 'desc' => "{BasPlugout.debug}", 78 'type' => "flag", 79 'reqd' => "no", 80 'hiddengli' => "yes"} 76 81 ]; 77 82 … … 483 488 # copy all the associated files, add this information as metadata 484 489 # to the document 485 print STDERR"Writing associated files to $doc_dir\n";490 print $outhandle "Writing associated files to $doc_dir\n"; 486 491 $self->process_assoc_files ($doc_obj, $doc_dir); 487 492 } 488 493 489 494 # save this document 490 $doc_obj->output_section('BasPlugout::GROUPPROCESS', $doc_obj->get_top_section()); 495 my $section_text = &docprint::get_section_xml($doc_obj,$doc_obj->get_top_section()); 496 print GROUPPROCESS $section_text; 491 497 492 498 $self->{'gs_count'}++; … … 717 723 } 718 724 725 719 726 #the subclass should implement this method if is_group method could return 1. 720 727 sub close_group_output{ … … 727 734 } 728 735 736 my $dc_set = { Title => 1, 737 Creator => 1, 738 Subject => 1, 739 Description => 1, 740 Publisher => 1, 741 Contributor => 1, 742 Date => 1, 743 Type => 1, 744 Format => 1, 745 Identifier => 1, 746 Source => 1, 747 Language => 1, 748 Relation => 1, 749 Coverage => 1, 750 Rights => 1}; 751 752 753 # returns an XML representation of the dublin core metadata 754 # if dc meta is not found, try ex mete 755 sub get_dc_metadata { 756 my $self = shift(@_); 757 my ($doc_obj, $section, $version) = @_; 758 759 # build up string of dublin core metadata 760 $section="" unless defined $section; 761 762 my $section_ptr = $doc_obj->_lookup_section($section); 763 return "" unless defined $section_ptr; 764 765 766 my $explicit_dc = {}; 767 my $explicit_ex = {}; 768 769 my $all_text=""; 770 foreach my $data (@{$section_ptr->{'metadata'}}){ 771 my $escaped_value = &docprint::escape_text($data->[1]); 772 if ($data->[0]=~ m/^dc\./) { 773 $data->[0] =~ tr/[A-Z]/[a-z]/; 774 775 $data->[0] =~ m/^dc\.(.*)/; 776 my $dc_element = $1; 777 778 if (!defined $explicit_dc->{$dc_element}) { 779 $explicit_dc->{$dc_element} = []; 780 } 781 push(@{$explicit_dc->{$dc_element}},$escaped_value); 782 783 if (defined $version && ($version eq "oai_dc")) { 784 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n"; 785 } 786 else { 787 # qualifier??? 788 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n"; 789 } 790 791 } 792 elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) { 793 $data->[0] =~ m/^(ex\.)?(.*)/; 794 my $ex_element = $2; 795 my $lc_ex_element = lc($ex_element); 796 797 if (defined $dc_set->{$ex_element}) { 798 if (!defined $explicit_ex->{$lc_ex_element}) { 799 $explicit_ex->{$lc_ex_element} = []; 800 } 801 push(@{$explicit_ex->{$lc_ex_element}},$escaped_value); 802 } 803 } 804 } 805 806 # go through dc_set and for any element *not* defined in explicit_dc 807 # that do exist in explicit_ex, add it in as metadata 808 foreach my $k ( keys %$dc_set ) { 809 my $lc_k = lc($k); 810 811 if (!defined $explicit_dc->{$lc_k}) { 812 if (defined $explicit_ex->{$lc_k}) { 813 814 foreach my $v (@{$explicit_ex->{$lc_k}}) { 815 my $dc_element = $lc_k; 816 my $escaped_value = $v; 817 818 if (defined $version && ($version eq "oai_dc")) { 819 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n"; 820 } 821 else { 822 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n"; 823 } 824 825 } 826 } 827 } 828 } 829 830 if ($all_text eq "") { 831 $all_text .= " There is no Dublin Core metatdata in this document\n"; 832 } 833 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 834 835 return $all_text; 836 } 837 838 # Build up dublin_core metadata. Priority given to dc.* over ex.* 839 # This method was apparently added by Jeffrey and committed by Shaoqun. 840 # But we don't know why it was added, so not using it anymore. 841 sub new_get_dc_metadata { 842 843 my $self = shift(@_); 844 my ($doc_obj, $section, $version) = @_; 845 846 # build up string of dublin core metadata 847 $section="" unless defined $section; 848 849 my $section_ptr=$doc_obj->_lookup_section($section); 850 return "" unless defined $section_ptr; 851 852 my $all_text = ""; 853 foreach my $data (@{$section_ptr->{'metadata'}}){ 854 my $escaped_value = &docprint::escape_text($data->[1]); 855 my $dc_element = $data->[0]; 856 857 my @array = split('\.',$dc_element); 858 my ($type,$name); 859 860 if(defined $array[1]) 861 { 862 $type = $array[0]; 863 $name = $array[1]; 864 } 865 else 866 { 867 $type = "ex"; 868 $name = $array[0]; 869 } 870 871 $all_text .= ' <Metadata Type="'. $type.'" Name="'.$name.'">'. $escaped_value. "</Metadata>\n"; 872 } 873 return $all_text; 874 } 875 876 729 877 1; -
trunk/gsdl/perllib/plugouts/DSpacePlugout.pm
r12363 r13172 113 113 114 114 $self->output_xml_header($outhandler, "dublin_core",1); 115 $doc_obj->output_dc_section($outhandler,$doc_obj->get_top_section); 115 116 my $all_text = $self->get_dc_metadata($doc_obj, $doc_obj->get_top_section()); 117 print $outhandler $all_text; 118 116 119 $self->output_xml_footer($outhandler,"dublin_core"); 117 120 -
trunk/gsdl/perllib/plugouts/GAPlugout.pm
r12363 r13172 28 28 use strict; 29 29 no strict 'refs'; 30 no strict 'subs'; 30 31 31 32 eval {require bytes}; 32 33 use util; 33 34 use BasPlugout; 35 use docprint; 34 36 35 37 sub BEGIN { … … 61 63 my ($doc_obj,$doc_dir) = @_; 62 64 63 my $output_dir = $self->get_output_dir(); 64 &util::mk_all_dir ($output_dir) unless -e $output_dir; 65 66 my $working_dir = &util::filename_cat ($output_dir, $doc_dir); 67 &util::mk_all_dir ($working_dir) unless -e $working_dir; 68 69 $self->process_assoc_files ($doc_obj, $doc_dir, ''); 70 71 my $output_file = util::filename_cat ($working_dir, "doc.xml"); 72 73 $self->open_xslt_pipe($output_file, $self->{'xslt_file'}); 74 75 my $outhandler; 76 77 if (defined $self->{'xslt_writer'}){ 78 $outhandler = $self->{'xslt_writer'}; 65 my $outhandler; 66 if ($self->{'debug'}) { 67 $outhandler = STDOUT; 68 # can we do the xslt and still do debug mode? 69 } 70 else { 71 my $output_dir = $self->get_output_dir(); 72 &util::mk_all_dir ($output_dir) unless -e $output_dir; 73 74 my $working_dir = &util::filename_cat ($output_dir, $doc_dir); 75 &util::mk_all_dir ($working_dir) unless -e $working_dir; 76 77 $self->process_assoc_files ($doc_obj, $doc_dir, ''); 78 79 my $output_file = util::filename_cat ($working_dir, "doc.xml"); 80 81 $self->open_xslt_pipe($output_file, $self->{'xslt_file'}); 82 83 84 if (defined $self->{'xslt_writer'}){ 85 $outhandler = $self->{'xslt_writer'}; 86 } 87 else{ 88 $outhandler = $self->get_output_handler($output_file); 89 } 79 90 } 80 else{ 81 $outhandler = $self->get_output_handler($output_file); 82 } 83 91 84 92 $self->output_xml_header($outhandler,"Archive"); 85 $doc_obj->output_section($outhandler,$doc_obj->get_top_section()); 93 my $section_output = &docprint::get_section_xml($doc_obj, $doc_obj->get_top_section()); 94 print $outhandler $section_output; 86 95 $self->output_xml_footer($outhandler,"Archive"); 87 96 88 if (defined $self->{'xslt_writer'}){ 89 $self->close_xslt_pipe(); 97 if (!$self->{'debug'}) { 98 if (defined $self->{'xslt_writer'}){ 99 $self->close_xslt_pipe(); 100 } 101 else { 102 close($outhandler); 103 } 104 105 $self->{'short_doc_file'} = util::filename_cat ($doc_dir, "doc.xml"); 106 107 $self->store_output_info_reference($doc_obj); 90 108 } 91 else{ 92 close($outhandler); 93 } 109 } 94 110 95 $self->{'short_doc_file'} = util::filename_cat ($doc_dir, "doc.xml"); 96 97 $self->store_output_info_reference($doc_obj); 98 } 111 99 112 100 113 1; -
trunk/gsdl/perllib/plugouts/MARCXMLPlugout.pm
r12605 r13172 32 32 use util; 33 33 use BasPlugout; 34 use docprint; # for escape_text 35 34 36 35 37 sub BEGIN { … … 117 119 118 120 $self->output_xml_header($outhandler,"MARCXML"); 119 print $outhandler $ doc_obj->get_top_metadata_list();121 print $outhandler $self->get_top_metadata_list($doc_obj); 120 122 $self->output_xml_footer($outhandler,"MARCXML"); 121 123 $self->close_xslt_pipe(); … … 123 125 } 124 126 127 # returns a xml element of the form <MetadataList><Metadata name="metadata-name">metadata_value</Metadata>...</MetadataList> 128 129 sub get_top_metadata_list { 130 131 my $self = shift (@_); 132 my ($doc_obj) = @_; 133 134 my @topmetadata =$doc_obj->get_all_metadata($doc_obj->get_top_section()); 135 my $metadatalist ='<MetadataList>'; 136 137 foreach my $i (@topmetadata){ 138 foreach my $j (@$i){ 139 my %metaMap = @$j; 140 foreach my $key (keys %metaMap){ 141 $metadatalist .='<Metadata name='."\"$key\"".'>'.&docprint::escape_text($metaMap{$key}).'</Metadata>'."\n"; 142 } 143 } 144 } 145 146 $metadatalist .='</MetadataList>'; 147 return $metadatalist; 148 } 125 149 126 150 -
trunk/gsdl/perllib/plugouts/METSPlugout.pm
r13051 r13172 32 32 use util; 33 33 use BasPlugout; 34 use docprint; # for escape_text 34 35 35 36 sub BEGIN { … … 218 219 219 220 my $all_text = "<Section>\n"; 220 $all_text .= $doc_obj->_escape_text("$section_ptr->{'text'}");221 $all_text .= &docprint::escape_text("$section_ptr->{'text'}"); 221 222 222 223 #output all the subsections … … 326 327 $all_text .= " <oai_dc:dc $dc_namespace>\n"; 327 328 328 $all_text .= $ doc_obj->buffer_dc_section($section,"oai_dc");329 $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc"); 329 330 $all_text .= " </oai_dc:dc>\n"; 330 331 } 331 332 else { 332 333 foreach my $data (@{$section_ptr->{'metadata'}}){ 333 my $escaped_value = $doc_obj->_escape_text($data->[1]);334 my $escaped_value = &docprint::escape_text($data->[1]); 334 335 $all_text .= ' <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n"; 335 336 if ($data->[0] eq "dc.Title") { … … 420 421 421 422 foreach my $data (@{$section_ptr->{'metadata'}}){ 422 my $escaped_value = $doc_obj->_escape_text($data->[1]);423 my $escaped_value = &docprint::escape_text($data->[1]); 423 424 424 425 if ($data->[0] eq "gsdlsourcefilename") { … … 513 514 514 515 foreach my $data (@{$section_ptr->{'metadata'}}){ 515 my $escaped_value = $doc_obj->_escape_text($data->[1]);516 my $escaped_value = &docprint::escape_text($data->[1]); 516 517 517 518 if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) {
Note:
See TracChangeset
for help on using the changeset viewer.