Changeset 14927 for gsdl/trunk/perllib/plugouts/METSPlugout.pm
- Timestamp:
- 2007-12-20T17:45:11+13:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugouts/METSPlugout.pm
r13172 r14927 27 27 28 28 use strict; 29 no strict 'subs'; 29 30 no strict 'refs'; 31 32 use gsprintf 'gsprintf'; 30 33 31 34 eval {require bytes}; … … 39 42 40 43 my $arguments = [ 41 { 'name' => "saveas_version",42 'desc' => "{METSPlugout.version}",43 'type' => "string",44 'deft' => 'greenstone',45 'reqd' => "yes",46 'hiddengli' => "no"},47 44 { 'name' => "xslt_txt", 48 45 'desc' => "{METSPlugout.xslt_txt}", … … 59 56 my $options = { 'name' => "METSPlugout", 60 57 'desc' => "{METSPlugout.desc}", 61 'abstract' => " no",58 'abstract' => "yes", 62 59 'inherits' => "yes", 63 60 'args' => $arguments … … 75 72 my $self = (defined $hashArgOptLists)? new BasPlugout($plugoutlist,$inputargs,$hashArgOptLists): new BasPlugout($plugoutlist,$inputargs); 76 73 77 78 74 79 75 return bless $self, $class; 80 76 } 81 77 82 sub saveas { 78 79 sub saveas_doctxt 80 { 83 81 my $self = shift (@_); 84 my ($doc_obj,$doc_dir) = @_; 85 my $version = $self->{'saveas_version'}; 86 87 $self->process_assoc_files ($doc_obj, $doc_dir, ''); 88 89 my $output_dir = $self->get_output_dir(); 90 &util::mk_all_dir ($output_dir) unless -e $output_dir; 91 92 my $working_dir = &util::filename_cat ($output_dir, $doc_dir); 93 94 &util::mk_all_dir ($working_dir) unless -e $working_dir; 95 96 ######################### 97 # save the text file 98 ######################### 82 my ($doc_obj,$working_dir) = @_; 83 84 my $is_recursive = 1; 85 99 86 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt.xml"); 100 87 … … 111 98 112 99 $self->output_xml_header($outhandler); 113 $self->output_txt_section($outhandler,$doc_obj, $doc_obj->get_top_section()); 100 my $section = $doc_obj->get_top_section(); 101 $self->output_txt_section($outhandler,$doc_obj, $section, $is_recursive); 114 102 $self->output_xml_footer($outhandler); 115 103 … … 121 109 close($outhandler); 122 110 } 123 124 ######################### 125 # save the mets file 126 ######################### 111 112 } 113 114 sub saveas_docmets 115 { 116 my $self = shift (@_); 117 my ($doc_obj,$working_dir) = @_; 118 127 119 my $doc_mets_file = &util::filename_cat ($working_dir, "docmets.xml"); 128 120 … … 134 126 $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'}); 135 127 128 my $outhandler; 129 136 130 if (defined $self->{'xslt_writer'}){ 137 131 $outhandler = $self->{'xslt_writer'}; … … 143 137 144 138 $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title); 145 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$ version,$working_dir);139 $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$working_dir); 146 140 $self->output_mets_xml_footer($outhandler); 147 141 … … 152 146 close($outhandler); 153 147 } 154 148 149 150 } 151 152 sub saveas 153 { 154 my $self = shift (@_); 155 my ($doc_obj,$doc_dir) = @_; 156 157 $self->process_assoc_files ($doc_obj, $doc_dir, ''); 158 159 my $output_dir = $self->get_output_dir(); 160 &util::mk_all_dir ($output_dir) unless -e $output_dir; 161 162 my $working_dir = &util::filename_cat ($output_dir, $doc_dir); 163 164 &util::mk_all_dir ($working_dir) unless -e $working_dir; 165 166 ### 167 # Save the text as a filefile 168 ### 169 $self->saveas_doctxt($doc_obj,$working_dir); 170 171 ### 172 # Save the structure and metadata as a METS file 173 ### 174 $self->saveas_docmets($doc_obj,$working_dir); 175 155 176 $self->{'short_doc_file'} = &util::filename_cat ($doc_dir, "docmets.xml"); 156 177 157 $self->store_output_info_reference($doc_obj); 158 159 } 160 161 162 sub output_mets_xml_header(){ 178 $self->store_output_info_reference($doc_obj); 179 180 } 181 182 183 sub output_mets_xml_header 184 { 163 185 my $self = shift(@_); 164 186 my ($handle, $OID, $doc_title) = @_; 165 187 166 my $version = $self->{'saveas_version'}; 167 168 my $extra_attr = ""; 169 if ($version eq "fedora") { 170 my $fnamespace = $ENV{'FEDORA_PID_NAMESPACE'}; 171 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test"; 172 173 $extra_attr = "OBJID=\"$oid_namespace:$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\""; 174 } 175 else { 176 # Greenstone METS profile 177 $extra_attr = "OBJID=\"$OID:2\""; 178 } 179 188 gsprintf(STDERR, "METSPlugout::output_mets_xml_header {common.must_be_implemented}\n") && die "\n"; 189 } 190 191 sub output_mets_xml_header_extra_attribute 192 { 193 my $self = shift(@_); 194 my ($handle, $extra_attr) = @_; 180 195 181 196 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n"; … … 188 203 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n"; 189 204 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n"; 205 190 206 print $handle " $extra_attr>\n"; 191 207 192 if ($version eq "fedora") { 193 print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active 194 } 195 196 } 197 198 sub output_mets_xml_footer() { 208 } 209 210 sub output_mets_xml_footer 211 { 199 212 my $self = shift(@_); 200 213 my ($handle) = @_; … … 205 218 sub output_txt_section { 206 219 my $self = shift (@_); 207 my ($handle, $doc_obj, $section ) = @_;208 209 print $handle $self->buffer_txt_section_xml($doc_obj, $section );220 my ($handle, $doc_obj, $section, $is_recursive) = @_; 221 222 print $handle $self->buffer_txt_section_xml($doc_obj, $section, $is_recursive); 210 223 } 211 224 212 225 sub buffer_txt_section_xml { 213 226 my $self = shift(@_); 214 my ($doc_obj, $section ) = @_;227 my ($doc_obj, $section, $is_recursive) = @_; 215 228 216 229 my $section_ptr = $doc_obj->_lookup_section ($section); … … 221 234 $all_text .= &docprint::escape_text("$section_ptr->{'text'}"); 222 235 223 #output all the subsections 224 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 225 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection"); 226 } 236 if (defined $is_recursive && $is_recursive) 237 { 238 # Output all the subsections 239 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 240 $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection"); 241 } 242 } 227 243 228 244 $all_text .= "</Section>\n"; … … 233 249 } 234 250 235 # print out docmets.xml file 236 sub output_mets_section { 237 my $self = shift(@_); 238 my ($handle, $doc_obj, $section, $version, $working_dir) = @_; 239 240 # print out the dmdSection 241 print $handle $self->buffer_mets_dmdSection_section_xml($doc_obj,$section, $version); 242 243 print $handle "<mets:fileSec>\n"; 244 if ($version eq "fedora") { 245 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n"; 246 } 247 248 # print out the fileSection by sections 249 print $handle $self->buffer_mets_fileSection_section_xml($doc_obj,$section,$version); 250 251 # print out the whole fileSection 252 print $handle $self->buffer_mets_fileWhole_section_xml($doc_obj,$section,$version,$working_dir); 253 254 if ($version eq "fedora") { 255 print $handle " </mets:fileGrp>\n"; 256 } 257 print $handle "</mets:fileSec>\n"; 258 259 # print out the StructMapSection by sections 260 261 my $struct_type; 262 if ($version eq "fedora") { 263 $struct_type = "fedora:dsBindingMap"; 264 } 265 else { 266 $struct_type = "Section"; 267 } 268 269 if ($version ne "fedora") { 270 print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n"; 271 my $order_num=0; 272 print $handle $self->buffer_mets_StructMapSection_section_xml($doc_obj,$section, \$order_num); 273 print $handle "</mets:structMap>\n"; 274 275 print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n"; 276 print $handle $self->buffer_mets_StructMapWhole_section_xml($doc_obj,$section); 277 print $handle "</mets:structMap>\n"; 278 } 279 280 } 281 282 sub buffer_mets_dmdSection_section_xml(){ 283 my $self = shift(@_); 284 my ($doc_obj,$section,$version) = @_; 285 251 # 252 # Print out docmets.xml file 253 # 254 sub output_mets_section 255 { 256 my $self = shift(@_); 257 my ($handle, $doc_obj, $section, $working_dir) = @_; 258 259 gsprintf(STDERR, "METSPlugout::output_mets_section {common.must_be_implemented}\n") && die "\n"; 260 261 } 262 263 264 sub buffer_mets_dmdSection_section_xml 265 { 266 my $self = shift(@_); 267 my ($doc_obj,$section) = @_; 268 269 gsprintf(STDERR, "METSPlugout::buffer_mets_dmdSection_section_xml {common.must_be_implemented}\n") && die "\n"; 270 } 271 272 sub buffer_mets_StructMapSection_section_xml 273 { 274 my $self = shift(@_); 275 my ($doc_obj,$section, $order_numref, $fileid_base) = @_; 276 286 277 $section="" unless defined $section; 278 287 279 288 280 my $section_ptr=$doc_obj->_lookup_section($section); 289 281 return "" unless defined $section_ptr; 290 282 291 # convert section number 292 my $section_num ="1". $section; 293 my $dmd_num = $section_num; 294 295 # #**output the dmdSection details 296 # if ($section_num eq "1") { 297 # $dmd_num = "0"; 298 # } 299 300 301 my $all_text = ""; 302 303 my $label_attr = ""; 304 if ($version eq "fedora") { 305 $all_text .= "<mets:amdSec ID=\"DC\" >\n"; 306 $all_text .= " <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number? 307 308 $label_attr = "LABEL=\"Dublin Core Metadata\""; 309 } 310 else { 311 # TODO:: 312 #print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n"; 313 #print STDERR "***** Check to see if <techMD> required\n"; 314 # if it isn't allowed, go back and set $mdTag = dmdSec/amdSec 315 316 $all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n"; 317 } 318 319 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n"; 320 $all_text .= " <mets:xmlData>\n"; 321 322 if ($version eq "fedora") { 323 my $dc_namespace = ""; 324 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\""; 325 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n"; 326 327 $all_text .= " <oai_dc:dc $dc_namespace>\n"; 328 329 $all_text .= $self->get_dc_metadata($doc_obj, $section,"oai_dc"); 330 $all_text .= " </oai_dc:dc>\n"; 331 } 332 else { 333 foreach my $data (@{$section_ptr->{'metadata'}}){ 334 my $escaped_value = &docprint::escape_text($data->[1]); 335 $all_text .= ' <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n"; 336 if ($data->[0] eq "dc.Title") { 337 $all_text .= ' <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n"; 338 } 339 } 340 } 341 342 $all_text .= " </mets:xmlData>\n"; 343 $all_text .= " </mets:mdWrap>\n"; 344 345 if ($version eq "fedora") { 346 $all_text .= " </mets:techMD>\n"; 347 $all_text .= "</mets:amdSec>\n"; 348 } 349 else { 350 $all_text .= "</mets:dmdSec>\n"; 351 } 352 353 354 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 355 $all_text .= $self->buffer_mets_dmdSection_section_xml($doc_obj,"$section.$subsection",$version); 356 } 357 358 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 359 360 return $all_text; 361 } 362 363 sub buffer_mets_StructMapSection_section_xml(){ 364 my $self = shift(@_); 365 my ($doc_obj,$section, $order_numref) = @_; 366 367 $section="" unless defined $section; 368 369 370 my $section_ptr=$doc_obj->_lookup_section($section); 371 return "" unless defined $section_ptr; 372 283 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base; 373 284 374 285 # output fileSection by Sections 375 286 my $section_num ="1". $section; 376 287 my $dmd_num = $section_num; 377 378 ##**output the dmdSection details379 #if ($section_num eq "1") {380 # $dmd_num = "0";381 #}382 288 383 289 #**output the StructMap details … … 389 295 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n"; 390 296 391 $all_text .= ' <mets:fptr FILEID=" FILEGROUP_PRELUDE'.$section_num.'" />'. "\n";297 $all_text .= ' <mets:fptr FILEID="'.$fileid_base.$section_num.'" />'. "\n"; 392 298 393 299 394 300 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 395 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref );301 $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref, $fileid_base); 396 302 } 397 303 … … 404 310 405 311 406 sub buffer_mets_StructMapWhole_section_xml(){ 312 sub buffer_mets_StructMapWhole_section_xml 313 { 407 314 my $self = shift(@_); 408 315 my ($doc_obj,$section) = @_; … … 440 347 } 441 348 442 sub buffer_mets_fileSection_section_xml() { 443 my $self = shift(@_); 444 my ($doc_obj,$section,$version) = @_; 349 350 351 sub doctxt_to_xlink 352 { 353 my $self = shift @_; 354 my ($fname,$working_dir) = @_; 355 356 gsprintf(STDERR, "METSPlugout::doxtxt_to_xlink {common.must_be_implemented}\n") && die "\n"; 357 } 358 359 sub buffer_mets_fileSection_section_xml 360 { 361 my $self = shift(@_); 362 my ($doc_obj,$section,$working_dir, $is_txt_split,$opt_attr,$fileid_base) = @_; 445 363 446 364 #$section="" unless defined $section; 447 365 448 449 366 my $section_ptr=$doc_obj->_lookup_section($section); 450 367 return "" unless defined $section_ptr; 451 368 369 $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base; 452 370 453 371 # output fileSection by sections 454 372 my $section_num ="1". $section; 455 456 my $filePath = 'doctxt.xml'; 457 458 my $opt_owner_id = ""; 459 if ($version eq "fedora") { 460 $opt_owner_id = "OWNERID=\"M\""; 461 } 462 373 374 $opt_attr = "" unless defined $opt_attr; 375 463 376 # output the fileSection details 464 my $all_text = ' <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n"; 465 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n"; 466 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section['; 467 468 my $xpath = "1".$section; 469 $xpath =~ s/\./]\/Section[/g; 470 471 $all_text .= $xpath; 472 473 $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n"; 377 my $all_text = ' <mets:fileGrp ID="'.$fileid_base.$section_num . '">'. "\n"; 378 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_attr >\n"; 379 380 my $xlink; 381 if (defined $is_txt_split && $is_txt_split) 382 { 383 my $section_fnum ="1". $section; 384 $section_fnum =~ s/\./_/g; 385 386 $xlink = $self->doctxt_to_xlink("doctxt$section_fnum.xml",$working_dir); 387 } 388 else 389 { 390 $xlink = $self->doctxt_to_xlink("doctxt.xml",$working_dir); 391 392 $xlink .= '#xpointer(/Section['; 393 394 my $xpath = "1".$section; 395 $xpath =~ s/\./\]\/Section\[/g; 396 397 $xlink .= $xpath; 398 399 $xlink .= ']/text())'; 400 } 401 402 403 404 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"'; 405 406 $all_text .= ' xlink:title="Hierarchical Document Structure"/>' . "\n"; 474 407 $all_text .= " </mets:file>\n"; 475 408 $all_text .= " </mets:fileGrp>\n"; … … 477 410 478 411 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 479 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$ version);412 $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$working_dir, $is_txt_split, $opt_attr, $fileid_base); 480 413 } 481 414 … … 485 418 } 486 419 487 sub buffer_mets_fileWhole_section_xml(){ 488 my $self = shift(@_); 489 my ($doc_obj,$section,$version,$working_dir) = @_; 490 491 my $section_ptr = $doc_obj-> _lookup_section($section); 492 return "" unless defined $section_ptr; 493 494 my $all_text=""; 495 496 my $fileID=0; 497 498 # Output the fileSection for the whole section 499 # => get the sourcefile and associative file 500 501 my $id_root = ""; 502 my $opt_owner_id = ""; 503 504 if ($version eq "fedora") { 505 $opt_owner_id = "OWNERID=\"M\""; 506 } 507 else { 508 $id_root = "default"; 509 } 510 511 if ($version ne "fedora") { 512 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n"; 513 } 514 515 foreach my $data (@{$section_ptr->{'metadata'}}){ 516 my $escaped_value = &docprint::escape_text($data->[1]); 517 518 if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) { 519 my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/; 520 521 ++$fileID; 522 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n"; 523 524 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n"; 525 526 $all_text .= " </mets:file>\n"; 527 } 528 529 if ($data->[0] eq "gsdlassocfile"){ 530 531 $escaped_value =~ m/^(.*?):(.*):(.*)$/; 532 my $assoc_file = $1; 533 my $mime_type = $2; 534 my $assoc_dir = $3; 535 536 if ($version eq "fedora") { 537 $id_root = $assoc_file; 538 $id_root =~ s/\//_/g; 539 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n"; 540 } 541 542 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file"; 543 ++$fileID; 544 545 my $mime_attr = "MIMETYPE=\"$mime_type\""; 546 my $xlink_title = "xlink:title=\"$assoc_file\""; 547 548 my $id_attr; 549 my $xlink_href; 550 551 if ($version eq "fedora") { 552 $id_attr = "ID=\"$id_root.0\""; 553 554 my $fedora_prefix = $ENV{'FEDORA_PREFIX'}; 555 if (!defined $fedora_prefix) { 556 $xlink_href = "xlink:href=\"$assfilePath\""; 557 } 558 else 559 { 560 my $gsdlhome = $ENV{'GSDLHOME'}; 561 my $gsdl_href = "$working_dir/$assfilePath"; 562 563 $gsdl_href =~ s/^$gsdlhome(\/)?//; 564 $gsdl_href = "/gsdl/$gsdl_href"; 565 566 my $fserver = $ENV{'FEDORA_HOSTNAME'}; 567 my $fport = $ENV{'FEDORA_SERVER_PORT'}; 568 569 my $fdomain = "http://$fserver:$fport"; 570 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\""; 571 } 572 573 my $top_section = $doc_obj->get_top_section(); 574 my $id = $doc_obj->get_metadata_element($top_section,"Identifier"); 575 } 576 else { 577 $id_attr = "ID=\"$id_root.$fileID\""; 578 $xlink_href = "xlink:href=\"$assfilePath\""; 579 } 580 581 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n"; 582 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n"; 583 584 $all_text .= " </mets:file>\n"; 585 586 if ($version eq "fedora") { 587 $all_text .= " </mets:fileGrp>\n"; 588 } 589 590 } 591 } 592 593 if ($version ne "fedora") { 594 $all_text .= " </mets:fileGrp>\n"; 595 } 596 597 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 598 599 return $all_text; 600 } 601 420 sub buffer_mets_fileWhole_section_xml 421 { 422 my $self = shift(@_); 423 my ($doc_obj,$section,$working_dir) = @_; 424 425 gsprintf(STDERR, "METSPlugout::buffer_mets_fileWhole_section_xml {common.must_be_implemented}\n") && die "\n"; 426 427 } 602 428 603 429 1;
Note:
See TracChangeset
for help on using the changeset viewer.