[12330] | 1 | ###########################################################################
|
---|
| 2 | #
|
---|
| 3 | # METSPlugout.pm -- the plugout module for METS archives
|
---|
| 4 | # A component of the Greenstone digital library software
|
---|
| 5 | # from the New Zealand Digital Library Project at the
|
---|
| 6 | # University of Waikato, New Zealand.
|
---|
| 7 | #
|
---|
| 8 | # Copyright (C) 2006 New Zealand Digital Library Project
|
---|
| 9 | #
|
---|
| 10 | # This program is free software; you can redistribute it and/or modify
|
---|
| 11 | # it under the terms of the GNU General Public License as published by
|
---|
| 12 | # the Free Software Foundation; either version 2 of the License, or
|
---|
| 13 | # (at your option) any later version.
|
---|
| 14 | #
|
---|
| 15 | # This program is distributed in the hope that it will be useful,
|
---|
| 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 18 | # GNU General Public License for more details.
|
---|
| 19 | #
|
---|
| 20 | # You should have received a copy of the GNU General Public License
|
---|
| 21 | # along with this program; if not, write to the Free Software
|
---|
| 22 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 23 | #
|
---|
| 24 | ###########################################################################
|
---|
| 25 |
|
---|
| 26 | package METSPlugout;
|
---|
| 27 |
|
---|
| 28 | use strict;
|
---|
[14927] | 29 | no strict 'subs';
|
---|
[12330] | 30 | no strict 'refs';
|
---|
| 31 |
|
---|
[14927] | 32 | use gsprintf 'gsprintf';
|
---|
| 33 |
|
---|
[12330] | 34 | eval {require bytes};
|
---|
| 35 | use util;
|
---|
| 36 | use BasPlugout;
|
---|
[13172] | 37 | use docprint; # for escape_text
|
---|
[12330] | 38 |
|
---|
| 39 | sub BEGIN {
|
---|
| 40 | @METSPlugout::ISA = ('BasPlugout');
|
---|
| 41 | }
|
---|
| 42 |
|
---|
| 43 | my $arguments = [
|
---|
| 44 | { 'name' => "xslt_txt",
|
---|
[12693] | 45 | 'desc' => "{METSPlugout.xslt_txt}",
|
---|
[12330] | 46 | 'type' => "string",
|
---|
| 47 | 'reqd' => "no",
|
---|
| 48 | 'hiddengli' => "no"},
|
---|
| 49 | { 'name' => "xslt_mets",
|
---|
[12693] | 50 | 'desc' => "{METSPlugout.xslt_mets}",
|
---|
[12330] | 51 | 'type' => "string",
|
---|
| 52 | 'reqd' => "no",
|
---|
| 53 | 'hiddengli' => "no"}
|
---|
| 54 | ];
|
---|
| 55 |
|
---|
| 56 | my $options = { 'name' => "METSPlugout",
|
---|
| 57 | 'desc' => "{METSPlugout.desc}",
|
---|
[14927] | 58 | 'abstract' => "yes",
|
---|
[12330] | 59 | 'inherits' => "yes",
|
---|
| 60 | 'args' => $arguments
|
---|
| 61 | };
|
---|
| 62 |
|
---|
| 63 | sub new {
|
---|
| 64 | my ($class) = shift (@_);
|
---|
| 65 | my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
|
---|
| 66 | push(@$plugoutlist, $class);
|
---|
| 67 |
|
---|
| 68 |
|
---|
| 69 | if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
|
---|
| 70 | if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
|
---|
| 71 |
|
---|
| 72 | my $self = (defined $hashArgOptLists)? new BasPlugout($plugoutlist,$inputargs,$hashArgOptLists): new BasPlugout($plugoutlist,$inputargs);
|
---|
| 73 |
|
---|
[14927] | 74 |
|
---|
[12330] | 75 | return bless $self, $class;
|
---|
| 76 | }
|
---|
| 77 |
|
---|
[14927] | 78 |
|
---|
| 79 | sub saveas_doctxt
|
---|
| 80 | {
|
---|
[12330] | 81 | my $self = shift (@_);
|
---|
[14927] | 82 | my ($doc_obj,$working_dir) = @_;
|
---|
[12330] | 83 |
|
---|
[14927] | 84 | my $is_recursive = 1;
|
---|
[12330] | 85 |
|
---|
| 86 | my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt.xml");
|
---|
| 87 |
|
---|
| 88 | $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
|
---|
| 89 |
|
---|
| 90 | my $outhandler;
|
---|
| 91 |
|
---|
| 92 | if (defined $self->{'xslt_writer'}){
|
---|
| 93 | $outhandler = $self->{'xslt_writer'};
|
---|
| 94 | }
|
---|
| 95 | else{
|
---|
| 96 | $outhandler = $self->get_output_handler($doc_txt_file);
|
---|
| 97 | }
|
---|
| 98 |
|
---|
| 99 | $self->output_xml_header($outhandler);
|
---|
[14927] | 100 | my $section = $doc_obj->get_top_section();
|
---|
| 101 | $self->output_txt_section($outhandler,$doc_obj, $section, $is_recursive);
|
---|
[12330] | 102 | $self->output_xml_footer($outhandler);
|
---|
| 103 |
|
---|
| 104 |
|
---|
| 105 | if (defined $self->{'xslt_writer'}){
|
---|
| 106 | $self->close_xslt_pipe();
|
---|
| 107 | }
|
---|
| 108 | else{
|
---|
| 109 | close($outhandler);
|
---|
| 110 | }
|
---|
[14927] | 111 |
|
---|
| 112 | }
|
---|
| 113 |
|
---|
| 114 | sub saveas_docmets
|
---|
| 115 | {
|
---|
| 116 | my $self = shift (@_);
|
---|
| 117 | my ($doc_obj,$working_dir) = @_;
|
---|
| 118 |
|
---|
[12330] | 119 | my $doc_mets_file = &util::filename_cat ($working_dir, "docmets.xml");
|
---|
| 120 |
|
---|
| 121 | my $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"dc.Title");
|
---|
| 122 | if (!defined $doc_title) {
|
---|
| 123 | $doc_title = $doc_obj->get_metadata_element($doc_obj->get_top_section(),"Title");
|
---|
| 124 | }
|
---|
| 125 |
|
---|
| 126 | $self->open_xslt_pipe($doc_mets_file,$self->{'xslt_mets'});
|
---|
| 127 |
|
---|
[14927] | 128 | my $outhandler;
|
---|
| 129 |
|
---|
[12330] | 130 | if (defined $self->{'xslt_writer'}){
|
---|
| 131 | $outhandler = $self->{'xslt_writer'};
|
---|
| 132 | }
|
---|
| 133 | else{
|
---|
| 134 | $outhandler = $self->get_output_handler($doc_mets_file);
|
---|
| 135 | }
|
---|
| 136 |
|
---|
| 137 |
|
---|
| 138 | $self->output_mets_xml_header($outhandler, $doc_obj->get_OID(), $doc_title);
|
---|
[14927] | 139 | $self->output_mets_section($outhandler, $doc_obj, $doc_obj->get_top_section(),$working_dir);
|
---|
[12330] | 140 | $self->output_mets_xml_footer($outhandler);
|
---|
| 141 |
|
---|
| 142 | if (defined $self->{'xslt_writer'}){
|
---|
| 143 | $self->close_xslt_pipe();
|
---|
| 144 | }
|
---|
| 145 | else{
|
---|
| 146 | close($outhandler);
|
---|
| 147 | }
|
---|
[14927] | 148 |
|
---|
| 149 |
|
---|
| 150 | }
|
---|
| 151 |
|
---|
| 152 | sub saveas
|
---|
| 153 | {
|
---|
| 154 | my $self = shift (@_);
|
---|
| 155 | my ($doc_obj,$doc_dir) = @_;
|
---|
| 156 |
|
---|
| 157 | $self->process_assoc_files ($doc_obj, $doc_dir, '');
|
---|
| 158 |
|
---|
| 159 | my $output_dir = $self->get_output_dir();
|
---|
| 160 | &util::mk_all_dir ($output_dir) unless -e $output_dir;
|
---|
| 161 |
|
---|
| 162 | my $working_dir = &util::filename_cat ($output_dir, $doc_dir);
|
---|
| 163 |
|
---|
| 164 | &util::mk_all_dir ($working_dir) unless -e $working_dir;
|
---|
| 165 |
|
---|
| 166 | ###
|
---|
| 167 | # Save the text as a filefile
|
---|
| 168 | ###
|
---|
| 169 | $self->saveas_doctxt($doc_obj,$working_dir);
|
---|
| 170 |
|
---|
| 171 | ###
|
---|
| 172 | # Save the structure and metadata as a METS file
|
---|
| 173 | ###
|
---|
| 174 | $self->saveas_docmets($doc_obj,$working_dir);
|
---|
| 175 |
|
---|
[12330] | 176 | $self->{'short_doc_file'} = &util::filename_cat ($doc_dir, "docmets.xml");
|
---|
| 177 |
|
---|
[14927] | 178 | $self->store_output_info_reference($doc_obj);
|
---|
| 179 |
|
---|
[12330] | 180 | }
|
---|
| 181 |
|
---|
| 182 |
|
---|
[14927] | 183 | sub output_mets_xml_header
|
---|
| 184 | {
|
---|
[12330] | 185 | my $self = shift(@_);
|
---|
| 186 | my ($handle, $OID, $doc_title) = @_;
|
---|
| 187 |
|
---|
[14927] | 188 | gsprintf(STDERR, "METSPlugout::output_mets_xml_header {common.must_be_implemented}\n") && die "\n";
|
---|
| 189 | }
|
---|
[12330] | 190 |
|
---|
[14927] | 191 | sub output_mets_xml_header_extra_attribute
|
---|
| 192 | {
|
---|
| 193 | my $self = shift(@_);
|
---|
| 194 | my ($handle, $extra_attr) = @_;
|
---|
[12330] | 195 |
|
---|
| 196 | print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n";
|
---|
| 197 | print $handle '<mets:mets xmlns:mets="http://www.loc.gov/METS/"' . "\n";
|
---|
| 198 | print $handle ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\n";
|
---|
| 199 | print $handle ' xmlns:gsdl3="http://www.greenstone.org/namespace/gsdlmetadata/1.0/"' . "\n";
|
---|
[14970] | 200 | ## print $handle ' xmlns:xlink="http://www.w3.org/TR/xlink"' ."\n";
|
---|
| 201 | print $handle ' xmlns:xlink="http://www.w3.org/1999/xlink"' ."\n";
|
---|
[12330] | 202 | print $handle ' xsi:schemaLocation="http://www.loc.gov/METS/' . "\n";
|
---|
| 203 | print $handle ' http://www.loc.gov/standards/mets/mets.xsd' . "\n";
|
---|
| 204 | print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n";
|
---|
| 205 | print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n";
|
---|
[14927] | 206 |
|
---|
[12330] | 207 | print $handle " $extra_attr>\n";
|
---|
| 208 |
|
---|
| 209 | }
|
---|
| 210 |
|
---|
[14927] | 211 | sub output_mets_xml_footer
|
---|
| 212 | {
|
---|
[12330] | 213 | my $self = shift(@_);
|
---|
| 214 | my ($handle) = @_;
|
---|
| 215 | print $handle '</mets:mets>' . "\n";
|
---|
| 216 | }
|
---|
| 217 |
|
---|
[13051] | 218 | # print out doctxt.xml file
|
---|
| 219 | sub output_txt_section {
|
---|
| 220 | my $self = shift (@_);
|
---|
[14927] | 221 | my ($handle, $doc_obj, $section, $is_recursive) = @_;
|
---|
[13051] | 222 |
|
---|
[14927] | 223 | print $handle $self->buffer_txt_section_xml($doc_obj, $section, $is_recursive);
|
---|
[13051] | 224 | }
|
---|
| 225 |
|
---|
| 226 | sub buffer_txt_section_xml {
|
---|
| 227 | my $self = shift(@_);
|
---|
[14927] | 228 | my ($doc_obj, $section, $is_recursive) = @_;
|
---|
[13051] | 229 |
|
---|
| 230 | my $section_ptr = $doc_obj->_lookup_section ($section);
|
---|
| 231 |
|
---|
| 232 | return "" unless defined $section_ptr;
|
---|
| 233 |
|
---|
| 234 | my $all_text = "<Section>\n";
|
---|
[13172] | 235 | $all_text .= &docprint::escape_text("$section_ptr->{'text'}");
|
---|
[13051] | 236 |
|
---|
[14927] | 237 | if (defined $is_recursive && $is_recursive)
|
---|
| 238 | {
|
---|
| 239 | # Output all the subsections
|
---|
| 240 | foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
|
---|
| 241 | $all_text .= $self->buffer_txt_section_xml($doc_obj, "$section.$subsection");
|
---|
| 242 | }
|
---|
| 243 | }
|
---|
[13051] | 244 |
|
---|
| 245 | $all_text .= "</Section>\n";
|
---|
| 246 |
|
---|
| 247 |
|
---|
| 248 | $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
|
---|
| 249 | return $all_text;
|
---|
| 250 | }
|
---|
| 251 |
|
---|
[14927] | 252 | #
|
---|
| 253 | # Print out docmets.xml file
|
---|
| 254 | #
|
---|
| 255 | sub output_mets_section
|
---|
| 256 | {
|
---|
[13051] | 257 | my $self = shift(@_);
|
---|
[14927] | 258 | my ($handle, $doc_obj, $section, $working_dir) = @_;
|
---|
[13051] | 259 |
|
---|
[14927] | 260 | gsprintf(STDERR, "METSPlugout::output_mets_section {common.must_be_implemented}\n") && die "\n";
|
---|
[13051] | 261 |
|
---|
[14927] | 262 | }
|
---|
[13051] | 263 |
|
---|
| 264 |
|
---|
[14927] | 265 | sub buffer_mets_dmdSection_section_xml
|
---|
| 266 | {
|
---|
[13051] | 267 | my $self = shift(@_);
|
---|
[14927] | 268 | my ($doc_obj,$section) = @_;
|
---|
[13051] | 269 |
|
---|
[14927] | 270 | gsprintf(STDERR, "METSPlugout::buffer_mets_dmdSection_section_xml {common.must_be_implemented}\n") && die "\n";
|
---|
[13051] | 271 | }
|
---|
| 272 |
|
---|
[14927] | 273 | sub buffer_mets_StructMapSection_section_xml
|
---|
| 274 | {
|
---|
[13051] | 275 | my $self = shift(@_);
|
---|
[14927] | 276 | my ($doc_obj,$section, $order_numref, $fileid_base) = @_;
|
---|
[13051] | 277 |
|
---|
| 278 | $section="" unless defined $section;
|
---|
| 279 |
|
---|
| 280 |
|
---|
| 281 | my $section_ptr=$doc_obj->_lookup_section($section);
|
---|
| 282 | return "" unless defined $section_ptr;
|
---|
| 283 |
|
---|
[14927] | 284 | $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
|
---|
[13051] | 285 |
|
---|
| 286 | # output fileSection by Sections
|
---|
| 287 | my $section_num ="1". $section;
|
---|
| 288 | my $dmd_num = $section_num;
|
---|
| 289 |
|
---|
| 290 | #**output the StructMap details
|
---|
| 291 |
|
---|
| 292 | my $dmdid_attr = "DM$dmd_num";
|
---|
| 293 |
|
---|
| 294 | my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
|
---|
| 295 | $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
|
---|
| 296 | $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
|
---|
| 297 |
|
---|
[14927] | 298 | $all_text .= ' <mets:fptr FILEID="'.$fileid_base.$section_num.'" />'. "\n";
|
---|
[13051] | 299 |
|
---|
| 300 |
|
---|
| 301 | foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
|
---|
[14927] | 302 | $all_text .= $self->buffer_mets_StructMapSection_section_xml($doc_obj,"$section.$subsection", $order_numref, $fileid_base);
|
---|
[13051] | 303 | }
|
---|
| 304 |
|
---|
| 305 | $all_text .= " </mets:div>\n";
|
---|
| 306 |
|
---|
| 307 | $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
|
---|
| 308 |
|
---|
| 309 | return $all_text;
|
---|
| 310 | }
|
---|
| 311 |
|
---|
| 312 |
|
---|
[14927] | 313 | sub buffer_mets_StructMapWhole_section_xml
|
---|
| 314 | {
|
---|
[13051] | 315 | my $self = shift(@_);
|
---|
| 316 | my ($doc_obj,$section) = @_;
|
---|
| 317 |
|
---|
| 318 | my $section_ptr = $doc_obj->_lookup_section($section);
|
---|
| 319 | return "" unless defined $section_ptr;
|
---|
| 320 |
|
---|
| 321 | my $all_text="";
|
---|
| 322 | my $fileID=0;
|
---|
| 323 | my $order_num = 0;
|
---|
| 324 |
|
---|
| 325 | $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
|
---|
| 326 |
|
---|
| 327 | #** output the StructMapSection for the whole section
|
---|
| 328 | # get the sourcefile and associative file
|
---|
| 329 |
|
---|
| 330 | foreach my $data (@{$section_ptr->{'metadata'}}){
|
---|
[13172] | 331 | my $escaped_value = &docprint::escape_text($data->[1]);
|
---|
[13051] | 332 |
|
---|
| 333 | if ($data->[0] eq "gsdlsourcefilename") {
|
---|
| 334 | ++$fileID;
|
---|
| 335 | $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
|
---|
| 336 | }
|
---|
| 337 |
|
---|
| 338 | if ($data->[0] eq "gsdlassocfile"){
|
---|
| 339 | ++$fileID;
|
---|
| 340 | $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
|
---|
| 341 | }
|
---|
| 342 | }
|
---|
| 343 | $all_text .= " </mets:div>\n";
|
---|
| 344 |
|
---|
| 345 | $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
|
---|
| 346 |
|
---|
| 347 | return $all_text;
|
---|
| 348 | }
|
---|
| 349 |
|
---|
[14927] | 350 |
|
---|
| 351 |
|
---|
| 352 | sub doctxt_to_xlink
|
---|
| 353 | {
|
---|
| 354 | my $self = shift @_;
|
---|
| 355 | my ($fname,$working_dir) = @_;
|
---|
| 356 |
|
---|
| 357 | gsprintf(STDERR, "METSPlugout::doxtxt_to_xlink {common.must_be_implemented}\n") && die "\n";
|
---|
| 358 | }
|
---|
| 359 |
|
---|
| 360 | sub buffer_mets_fileSection_section_xml
|
---|
| 361 | {
|
---|
[13051] | 362 | my $self = shift(@_);
|
---|
[14927] | 363 | my ($doc_obj,$section,$working_dir, $is_txt_split,$opt_attr,$fileid_base) = @_;
|
---|
[13051] | 364 |
|
---|
| 365 | #$section="" unless defined $section;
|
---|
| 366 |
|
---|
| 367 | my $section_ptr=$doc_obj->_lookup_section($section);
|
---|
| 368 | return "" unless defined $section_ptr;
|
---|
| 369 |
|
---|
[14927] | 370 | $fileid_base = "FILEGROUP_PRELUDE" unless defined $fileid_base;
|
---|
[13051] | 371 |
|
---|
| 372 | # output fileSection by sections
|
---|
| 373 | my $section_num ="1". $section;
|
---|
[14927] | 374 |
|
---|
| 375 | $opt_attr = "" unless defined $opt_attr;
|
---|
| 376 |
|
---|
| 377 | # output the fileSection details
|
---|
| 378 | my $all_text = ' <mets:fileGrp ID="'.$fileid_base.$section_num . '">'. "\n";
|
---|
| 379 | $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_attr >\n";
|
---|
[13051] | 380 |
|
---|
[14927] | 381 | my $xlink;
|
---|
| 382 | if (defined $is_txt_split && $is_txt_split)
|
---|
| 383 | {
|
---|
| 384 | my $section_fnum ="1". $section;
|
---|
| 385 | $section_fnum =~ s/\./_/g;
|
---|
| 386 |
|
---|
| 387 | $xlink = $self->doctxt_to_xlink("doctxt$section_fnum.xml",$working_dir);
|
---|
[13051] | 388 | }
|
---|
[14927] | 389 | else
|
---|
| 390 | {
|
---|
| 391 | $xlink = $self->doctxt_to_xlink("doctxt.xml",$working_dir);
|
---|
[13051] | 392 |
|
---|
[14927] | 393 | $xlink .= '#xpointer(/Section[';
|
---|
[13051] | 394 |
|
---|
[14927] | 395 | my $xpath = "1".$section;
|
---|
| 396 | $xpath =~ s/\./\]\/Section\[/g;
|
---|
| 397 |
|
---|
| 398 | $xlink .= $xpath;
|
---|
| 399 |
|
---|
| 400 | $xlink .= ']/text())';
|
---|
| 401 | }
|
---|
[13051] | 402 |
|
---|
[14927] | 403 |
|
---|
| 404 |
|
---|
| 405 | $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="'.$xlink.'"';
|
---|
| 406 |
|
---|
| 407 | $all_text .= ' xlink:title="Hierarchical Document Structure"/>' . "\n";
|
---|
[13051] | 408 | $all_text .= " </mets:file>\n";
|
---|
| 409 | $all_text .= " </mets:fileGrp>\n";
|
---|
| 410 |
|
---|
| 411 |
|
---|
| 412 | foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
|
---|
[14927] | 413 | $all_text .= $self->buffer_mets_fileSection_section_xml($doc_obj,"$section.$subsection",$working_dir, $is_txt_split, $opt_attr, $fileid_base);
|
---|
[13051] | 414 | }
|
---|
| 415 |
|
---|
| 416 | $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
|
---|
| 417 |
|
---|
| 418 | return $all_text;
|
---|
| 419 | }
|
---|
| 420 |
|
---|
[14927] | 421 | sub buffer_mets_fileWhole_section_xml
|
---|
| 422 | {
|
---|
[13051] | 423 | my $self = shift(@_);
|
---|
[14927] | 424 | my ($doc_obj,$section,$working_dir) = @_;
|
---|
[13051] | 425 |
|
---|
[14927] | 426 | gsprintf(STDERR, "METSPlugout::buffer_mets_fileWhole_section_xml {common.must_be_implemented}\n") && die "\n";
|
---|
[13051] | 427 |
|
---|
| 428 | }
|
---|
| 429 |
|
---|
[12330] | 430 | 1;
|
---|