Changeset 13050
- Timestamp:
- 2006-10-09T10:38:57+13:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/doc.pm
r13045 r13050 224 224 225 225 sub _escape_text { 226 my $self = shift (@_); 226 227 my ($text) = @_; 227 228 # special characters in the gml encoding … … 246 247 # output metadata 247 248 foreach my $data (@{$section_ptr->{'metadata'}}) { 248 my $escaped_value = &_escape_text($data->[1]);249 my $escaped_value = $self->_escape_text($data->[1]); 249 250 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n"; 250 251 } … … 254 255 # output the text 255 256 $all_text .= " <Content>"; 256 $all_text .= &_escape_text($section_ptr->{'text'});257 $all_text .= $self->_escape_text($section_ptr->{'text'}); 257 258 $all_text .= "</Content>\n"; 258 259 … … 272 273 } 273 274 274 sub buffer_txt_section_xml { 275 my $self = shift(@_); 276 my ($section) = @_; 277 278 my $section_ptr = $self->_lookup_section ($section); 279 280 return "" unless defined $section_ptr; 281 282 my $all_text = "<Section>\n"; 283 284 ##output the text 285 #$all_text .= " <Content>"; 286 $all_text .= &_escape_text($section_ptr->{'text'}); 287 #$all_text .= " </Content>\n"; 288 289 290 #output all the subsections 291 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 292 $all_text .= $self->buffer_txt_section_xml("$section.$subsection"); 293 } 294 295 $all_text .= "</Section>\n"; 296 297 298 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 299 return $all_text; 300 } 301 302 sub buffer_mets_fileSection_section_xml() { 303 my $self = shift(@_); 304 my ($section,$version) = @_; 305 306 #$section="" unless defined $section; 307 308 309 my $section_ptr=$self->_lookup_section($section); 310 return "" unless defined $section_ptr; 311 312 313 # output fileSection by sections 314 my $section_num ="1". $section; 315 316 my $filePath = 'doctxt.xml'; 317 318 my $opt_owner_id = ""; 319 if ($version eq "fedora") { 320 $opt_owner_id = "OWNERID=\"M\""; 321 } 322 323 # output the fileSection details 324 my $all_text = ' <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n"; 325 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n"; 326 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section['; 327 328 my $xpath = "1".$section; 329 $xpath =~ s/\./]\/Section[/g; 330 331 $all_text .= $xpath; 332 333 $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n"; 334 $all_text .= " </mets:file>\n"; 335 $all_text .= " </mets:fileGrp>\n"; 336 337 338 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 339 $all_text .= $self->buffer_mets_fileSection_section_xml("$section.$subsection",$version); 340 } 341 342 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 343 344 return $all_text; 345 } 346 347 sub buffer_mets_fileWhole_section_xml(){ 348 my $self = shift(@_); 349 my ($section,$version,$working_dir) = @_; 350 351 my $section_ptr = $self-> _lookup_section($section); 352 return "" unless defined $section_ptr; 353 354 my $all_text="" unless defined $all_txt; 355 356 my $fileID=0; 357 358 # Output the fileSection for the whole section 359 # => get the sourcefile and associative file 360 361 my $id_root = ""; 362 my $opt_owner_id = ""; 363 364 if ($version eq "fedora") { 365 $opt_owner_id = "OWNERID=\"M\""; 366 } 367 else { 368 $id_root = "default"; 369 } 370 371 if ($version ne "fedora") { 372 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n"; 373 } 374 375 foreach my $data (@{$section_ptr->{'metadata'}}){ 376 my $escaped_value = &_escape_text($data->[1]); 377 378 if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) { 379 my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/; 380 381 ++$fileID; 382 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n"; 383 384 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n"; 385 386 $all_text .= " </mets:file>\n"; 387 } 388 389 if ($data->[0] eq "gsdlassocfile"){ 390 391 $escaped_value =~ m/^(.*?):(.*):(.*)$/; 392 my $assoc_file = $1; 393 my $mime_type = $2; 394 my $assoc_dir = $3; 395 396 if ($version eq "fedora") { 397 $id_root = $assoc_file; 398 $id_root =~ s/\//_/g; 399 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n"; 400 } 401 402 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file"; 403 ++$fileID; 404 405 my $mime_attr = "MIMETYPE=\"$mime_type\""; 406 my $xlink_title = "xlink:title=\"$assoc_file\""; 407 408 my $id_attr; 409 my $xlink_href; 410 411 if ($version eq "fedora") { 412 $id_attr = "ID=\"$id_root.0\""; 413 414 my $fedora_prefix = $ENV{'FEDORA_PREFIX'}; 415 if (!defined $fedora_prefix) { 416 $xlink_href = "xlink:href=\"$assfilePath\""; 417 } 418 else 419 { 420 my $gsdlhome = $ENV{'GSDLHOME'}; 421 my $gsdl_href = "$working_dir/$assfilePath"; 422 423 $gsdl_href =~ s/^$gsdlhome(\/)?//; 424 $gsdl_href = "/gsdl/$gsdl_href"; 425 426 my $fserver = $ENV{'FEDORA_HOSTNAME'}; 427 my $fport = $ENV{'FEDORA_SERVER_PORT'}; 428 429 my $fdomain = "http://$fserver:$fport"; 430 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\""; 431 } 432 433 my $top_section = $self->get_top_section(); 434 my $id = $self->get_metadata_element($top_section,"Identifier"); 435 } 436 else { 437 $id_attr = "ID=\"$id_root.$fileID\""; 438 $xlink_href = "xlink:href=\"$assfilePath\""; 439 } 440 441 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n"; 442 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n"; 443 444 $all_text .= " </mets:file>\n"; 445 446 if ($version eq "fedora") { 447 $all_text .= " </mets:fileGrp>\n"; 448 } 449 450 } 451 } 452 453 if ($version ne "fedora") { 454 $all_text .= " </mets:fileGrp>\n"; 455 } 456 457 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 458 459 return $all_text; 460 } 461 462 sub buffer_mets_StructMapSection_section_xml(){ 463 my $self = shift(@_); 464 my ($section, $order_numref) = @_; 465 466 $section="" unless defined $section; 467 468 469 my $section_ptr=$self->_lookup_section($section); 470 return "" unless defined $section_ptr; 471 472 473 # output fileSection by Sections 474 my $section_num ="1". $section; 475 my $dmd_num = $section_num; 476 477 ##**output the dmdSection details 478 #if ($section_num eq "1") { 479 # $dmd_num = "0"; 480 #} 481 482 #**output the StructMap details 483 484 my $dmdid_attr = "DM$dmd_num"; 485 486 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n"; 487 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n"; 488 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n"; 489 490 $all_text .= ' <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n"; 491 492 493 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 494 $all_text .= $self->buffer_mets_StructMapSection_section_xml("$section.$subsection", $order_numref); 495 } 496 497 $all_text .= " </mets:div>\n"; 498 499 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 500 501 return $all_text; 502 } 503 504 505 sub buffer_mets_StructMapWhole_section_xml(){ 506 my $self = shift(@_); 507 my ($section) = @_; 508 509 my $section_ptr = $self-> _lookup_section($section); 510 return "" unless defined $section_ptr; 511 512 my $all_text="" unless defined $all_txt; 513 my $fileID=0; 514 my $order_num = 0; 515 516 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n"; 517 518 #** output the StructMapSection for the whole section 519 # get the sourcefile and associative file 520 521 foreach my $data (@{$section_ptr->{'metadata'}}){ 522 my $escaped_value = &_escape_text($data->[1]); 523 524 if ($data->[0] eq "gsdlsourcefilename") { 525 ++$fileID; 526 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n"; 527 } 528 529 if ($data->[0] eq "gsdlassocfile"){ 530 ++$fileID; 531 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n"; 532 } 533 } 534 $all_text .= " </mets:div>\n"; 535 536 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 537 538 return $all_text; 539 } 540 541 542 sub buffer_mets_dmdSection_section_xml(){ 543 my $self = shift(@_); 544 my ($section,$version) = @_; 545 546 $section="" unless defined $section; 547 548 my $section_ptr=$self->_lookup_section($section); 549 return "" unless defined $section_ptr; 550 551 # convert section number 552 my $section_num ="1". $section; 553 my $dmd_num = $section_num; 554 555 # #**output the dmdSection details 556 # if ($section_num eq "1") { 557 # $dmd_num = "0"; 558 # } 559 560 561 my $all_text = ""; 562 563 my $label_attr = ""; 564 if ($version eq "fedora") { 565 $all_text .= "<mets:amdSec ID=\"DC\" >\n"; 566 $all_text .= " <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number? 567 568 $label_attr = "LABEL=\"Dublin Core Metadata\""; 569 } 570 else { 571 # TODO:: 572 #print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n"; 573 #print STDERR "***** Check to see if <techMD> required\n"; 574 # if it isn't allowed, go back and set $mdTag = dmdSec/amdSec 575 576 $all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n"; 577 } 578 579 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n"; 580 $all_text .= " <mets:xmlData>\n"; 581 582 if ($version eq "fedora") { 583 my $dc_namespace = ""; 584 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\""; 585 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n"; 586 587 $all_text .= " <oai_dc:dc $dc_namespace>\n"; 588 589 $all_text .= $self->buffer_dc_section($section,"oai_dc"); 590 $all_text .= " </oai_dc:dc>\n"; 591 } 592 else { 593 foreach my $data (@{$section_ptr->{'metadata'}}){ 594 my $escaped_value = &_escape_text($data->[1]); 595 $all_text .= ' <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n"; 596 if ($data->[0] eq "dc.Title") { 597 $all_text .= ' <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n"; 598 } 599 } 600 } 601 602 $all_text .= " </mets:xmlData>\n"; 603 $all_text .= " </mets:mdWrap>\n"; 604 605 if ($version eq "fedora") { 606 $all_text .= " </mets:techMD>\n"; 607 $all_text .= "</mets:amdSec>\n"; 608 } 609 else { 610 $all_text .= "</mets:dmdSec>\n"; 611 } 612 613 614 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 615 $all_text .= $self->buffer_mets_dmdSection_section_xml("$section.$subsection",$version); 616 } 617 618 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 619 620 return $all_text; 621 } 275 276 622 277 623 278 sub output_section { … … 626 281 627 282 print $handle $self->buffer_section_xml($section); 628 }629 630 # print out DSpace dublin_core metadata section631 sub output_dspace_section {632 my $self = shift (@_);633 my ($handle, $section) = @_;634 635 my $section_ptr = $self->_lookup_section ($section);636 return "" unless defined $section_ptr;637 638 my $all_text = "<Section>\n";639 $all_text .= " <Description>\n";640 641 # output metadata642 foreach my $data (@{$section_ptr->{'metadata'}}) {643 my $escaped_value = &_escape_text($data->[1]);644 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";645 }646 647 $all_text .= " </Description>\n";648 $all_text .= "</Section>\n";649 650 # make sure no nasty control characters have snuck through651 # (XML::Parser will barf on anything it doesn't consider to be652 # valid UTF-8 text, including things like \c@, \cC etc.)653 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;654 655 return $all_text;656 }657 658 # print out doctxt.xml file659 sub output_txt_section {660 my $self = shift (@_);661 my ($handle, $section) = @_;662 663 print $handle $self->buffer_txt_section_xml($section);664 }665 666 # print out docmets.xml file667 sub output_mets_section {668 my $self = shift(@_);669 my ($handle, $section, $version, $working_dir) = @_;670 671 # print out the dmdSection672 print $handle $self->buffer_mets_dmdSection_section_xml($section, $version);673 674 print $handle "<mets:fileSec>\n";675 if ($version eq "fedora") {676 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";677 }678 679 # print out the fileSection by sections680 print $handle $self->buffer_mets_fileSection_section_xml($section,$version);681 682 # print out the whole fileSection683 print $handle $self->buffer_mets_fileWhole_section_xml($section,$version,$working_dir);684 685 if ($version eq "fedora") {686 print $handle " </mets:fileGrp>\n";687 }688 print $handle "</mets:fileSec>\n";689 690 # print out the StructMapSection by sections691 692 my $struct_type;693 if ($version eq "fedora") {694 $struct_type = "fedora:dsBindingMap";695 }696 else {697 $struct_type = "Section";698 }699 700 if ($version ne "fedora") {701 print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n";702 my $order_num=0;703 print $handle $self->buffer_mets_StructMapSection_section_xml($section, \$order_num);704 print $handle "</mets:structMap>\n";705 706 print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";707 print $handle $self->buffer_mets_StructMapWhole_section_xml($section);708 print $handle "</mets:structMap>\n";709 }710 711 283 } 712 284 … … 744 316 return "" unless defined $section_ptr; 745 317 foreach my $data (@{$section_ptr->{'metadata'}}){ 746 my $escaped_value = &_escape_text($data->[1]);318 my $escaped_value = $self->_escape_text($data->[1]); 747 319 my $dc_element = $data->[0]; 748 320 … … 788 360 } 789 361 print "\n"; 790 my $escaped_value = &_escape_text($data->[1]);362 my $escaped_value = $self->_escape_text($data->[1]); 791 363 if ($data->[0]=~ m/^dc\./) { 792 364 $data->[0] =~ tr/[A-Z]/[a-z]/; … … 1415 987 my %metaMap = @$j; 1416 988 foreach my $key (keys %metaMap){ 1417 $metadatalist .='<Metadata name='."\"$key\"".'>'. &_escape_text($metaMap{$key}).'</Metadata>'."\n";989 $metadatalist .='<Metadata name='."\"$key\"".'>'.$self->_escape_text($metaMap{$key}).'</Metadata>'."\n"; 1418 990 } 1419 991 }
Note:
See TracChangeset
for help on using the changeset viewer.