Changeset 9954
- Timestamp:
- 2005-05-25T17:11:20+12:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/docsave.pm
r9921 r9954 49 49 my $self = new docproc (); 50 50 51 $groupsize=1 unless defined $groupsize; 52 $service="import" unless defined $service; 51 my $collectdir = $ENV{'GSDLCOLLECTDIR'}; 52 53 $outhandle = 'STDERR' unless (defined $outhandle); 54 $service = "import" unless (defined $service); 55 $saveas = "GA" unless (defined $saveas); 56 $groupsize = 1 unless (defined $groupsize); 53 57 54 58 $self->{'collection'} = $collection; 55 if ( $service eq "import"){59 if (($service eq "import") || ($service eq "unbuild")) { 56 60 $self->{'archive_info'} = $info; 61 # set a default for the archive directory 62 $self->{'archive_dir'} = &util::filename_cat ($collectdir, "archives"); 57 63 } elsif ($service eq "export"){ 58 64 $self->{'export_info'} = $info; 65 # set a default for the export directory 66 $self->{'export_dir'} = &util::filename_cat($collectdir, "export"); 59 67 } else { 68 print $outhandle "docsave::new Unrecongised service: $service\n"; 60 69 return; 61 70 } … … 65 74 $self->{'keepimportstructure'} = 0; 66 75 $self->{'groupsize'} = $groupsize; 67 $self->{'gs_count'} = 0; 68 69 $self->{'outhandle'} = 'STDERR'; 70 $self->{'outhandle'} = $outhandle if defined $outhandle; 71 $self->{'service'} = $service; 72 $self->{'saveas'} = $saveas; 73 74 # set a default for the archive directory 75 if ($service eq "import"){ 76 $self->{'archive_dir'} = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives"); 77 } elsif ($service eq "export") { 78 # set a default for the export directory 79 $self->{'export_dir'} = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "export"); 80 } else { 81 return; 82 } 76 $self->{'gs_count'} = 0; 77 78 $self->{'outhandle'} = $outhandle; 79 $self->{'service'} = $service; 80 $self->{'saveas'} = $saveas; 81 83 82 $self->{'sortmeta'} = undef; 84 83 … … 102 101 } 103 102 103 sub getoutputdir { 104 my $self = shift (@_); 105 106 my $output_dir = undef; 107 108 my $service = $self->{'service'}; 109 110 if (($service eq "import") || ($service eq "unbuild")) { 111 $output_dir = $self->{'archive_dir'}; 112 } 113 elsif ($service eq "export") { 114 $output_dir = $self->{'export_dir'}; 115 } 116 else { 117 my $outhandle = $self->{'outhandle'}; 118 119 print $outhandle "docsave::getoutputdir did not recognise service "; 120 print $outhandle " '$service'. No output directory set.\n"; 121 } 122 123 return $output_dir; 124 } 125 126 127 sub getoutputinfo { 128 my $self = shift (@_); 129 130 my $output_info = undef; 131 132 my $service = $self->{'service'}; 133 134 if (($service eq "import") || ($service eq "unbuild")) { 135 $output_info = $self->{'archive_info'}; 136 } 137 elsif ($service eq "export") { 138 $output_info = $self->{'export_info'}; 139 } 140 else { 141 my $outhandle = $self->{'outhandle'}; 142 143 print $outhandle "docsave::getoutputinfo did not recognise service "; 144 print $outhandle " '$service'. No output information available.\n"; 145 } 146 147 return $output_info; 148 } 149 150 104 151 sub set_sortmeta { 105 152 my $self = shift (@_); … … 122 169 123 170 my $outhandle = $self->{'outhandle'}; 124 my $service = $self->{'service'} || "import";171 my $service = $self->{'service'}; 125 172 126 173 # Define the SaveAs Type 127 my $save_as = $self->{'saveas'} || "GA";174 my $save_as = $self->{'saveas'}; 128 175 my $collection = $self->{'collection'}; 129 176 … … 136 183 $OID = "NULL" unless defined $OID; 137 184 185 my $top_section = $doc_obj->get_top_section(); 186 138 187 # get document's directory 139 188 my $doc_dir = $self->get_doc_dir ($OID, $doc_obj->get_source_filename()); … … 141 190 # groupsize is 1 (i.e. one document per XML file) so sortmeta 142 191 # may be used 143 144 if ($service eq "import") { 145 my $archive_info = $self->{'archive_info'}; 146 } elsif ($service eq "export") { 147 my $export_info = $self->{'export_info'}; 148 } else { 149 return; 150 } 151 192 193 my $output_info = $self->getoutputinfo(); 194 return if (!defined $output_info); 195 196 my $output_dir = $self->getoutputdir(); 197 my $working_dir = &util::filename_cat ($output_dir, $doc_dir); 198 152 199 # copy all the associated files, add this information as metadata 153 200 # to the document 154 if ($service eq "export" && $save_as eq "DSpace") { 155 # create handle file based on doc_dir 156 157 my $doc_handle_file 158 = &util::filename_cat ($self->{'export_dir'},$doc_dir, "handle"); 159 160 if (!open(OUTDOC_EXPORT_HANDLE,">$doc_handle_file")){ 201 if ($save_as eq "DSpace") { 202 203 # Genereate handle file 204 # (Note: this section of code would benefit from being restructured) 205 my $doc_handle_file = &util::filename_cat ($working_dir, "handle"); 206 207 my $env_hp = $ENV{'DSPACE_HANDLE_PREFIX'}; 208 my $handle_prefix = (defined $env_hp) ? $env_hp : "123456789"; 209 210 if (!open(OUTDOC_HANDLE,">$doc_handle_file")){ 161 211 print $outhandle "docsave::process could not write collection handle to file $doc_handle_file\n"; 162 212 return; … … 164 214 165 215 my ($handle) = ($doc_dir =~ m/^(.*)\.dir$/); 166 print OUTDOC_EXPORT_HANDLE "123456789/$handle\n"; 167 168 close(OUTDOC_EXPORT_HANDLE); 169 170 # open contents file 171 my $doc_contents_file 172 = &util::filename_cat ($self->{'export_dir'},$doc_dir, "contents"); 173 174 if (!open(OUTDOC_EXPORT_CONTENTS,">$doc_contents_file")){ 216 print OUTDOC_HANDLE "$handle_prefix/$handle\n"; 217 218 close OUTDOC_HANDLE; 219 220 # Generate contents file 221 my $doc_contents_file = &util::filename_cat ($working_dir, "contents"); 222 223 if (!open(OUTDOC_CONTENTS,">$doc_contents_file")){ 175 224 print $outhandle "docsave::process could not write collection contents to file $doc_contents_file\n"; 176 225 return; 177 226 } 178 $self->process_assoc_files ($doc_obj, $doc_dir, 'docsave::OUTDOC_EXPORT_CONTENTS'); 227 $self->process_assoc_files ($doc_obj, $doc_dir, 'docsave::OUTDOC_CONTENTS'); 228 229 close OUTDOC_CONTENTS; 230 179 231 } else { 180 232 $self->process_assoc_files ($doc_obj, $doc_dir, ''); 181 233 } 182 234 183 my $doc_file; 184 my $doc_mets_file; 185 my $doc_txt_file; 235 # Save the document in the requested 'save_as' format 236 237 if ($save_as eq "GA") { 238 239 my $doc_file = &util::filename_cat ($working_dir, "doc.xml"); 240 241 if (!open (OUTDOC, ">$doc_file")) { 242 print $outhandle "docsave::process could not write to file $doc_file\n"; 243 return; 244 } 245 246 # save this document 247 $self->output_xml_header('docsave::OUTDOC'); 248 $doc_obj->output_section('docsave::OUTDOC',$top_section); 249 $self->output_xml_footer('docsave::OUTDOC'); 250 251 close OUTDOC; 252 } 253 elsif ($save_as eq "METS") { 254 255 my $doc_txt_file = &util::filename_cat ($working_dir,"doctxt.xml"); 256 257 if (!open(OUTDOC_TXT, ">$doc_txt_file")){ 258 print $outhandle "docsave::process could not write to file $doc_txt_file\n"; 259 return; 260 } 261 262 $self->output_txt_xml_header('docsave::OUTDOC_TXT'); 263 $doc_obj->output_txt_section('docsave::OUTDOC_TXT', $top_section); 264 $self->output_txt_xml_footer('docsave::OUTDOC_TXT'); 265 266 close OUTDOC_TXT; 267 268 # Now save the document with metadata and text structure to docmets.xml 269 270 my $doc_mets_file = &util::filename_cat ($working_dir, "docmets.xml"); 271 272 my $doc_title = $doc_obj->get_metadata_element($top_section,"dc.Title"); 273 if (!defined $doc_title) { 274 $doc_title = $doc_obj->get_metadata_element($top_section,"Title"); 275 } 276 277 if (!open(OUTDOC_METS,">$doc_mets_file")){ 278 print $outhandle "docsave::process could not write to file $doc_mets_file\n"; 279 return; 280 } 281 282 my $saveas_version = $self->{'saveas_version'}; 283 $self->output_mets_xml_header('docsave::OUTDOC_METS', $OID, $doc_title); 284 $doc_obj->output_mets_section('docsave::OUTDOC_METS',$top_section,$saveas_version,$working_dir); 285 $self->output_mets_xml_footer('docsave::OUTDOC_METS'); 286 287 close OUTDOC_METS; 288 } 289 elsif ($save_as eq "DSpace") { 290 291 # Generate dublin_core.xml file 292 my $doc_dc_file = &util::filename_cat ($working_dir, "dublin_core.xml"); 293 294 if (!open(OUTDOC_DC,">$doc_dc_file")){ 295 print $outhandle "docsave::process could not write dublin core to file $doc_dc_file\n"; 296 return; 297 } 298 299 my $saveas_version = $self->{'saveas_version'}; 300 301 $self->output_dc_xml_header('docsave::OUTDOC_DC', $OID); 302 $doc_obj->output_dc_section('docsave::OUTDOC_DC',$top_section); 303 $self->output_dc_xml_footer('docsave::OUTDOC_DC'); 304 305 close OUTDOC_DC; 306 } else { # save_as isn't one of the recognised types 307 print $outhandle "docsave::process unrecognised saveas type, $save_as\n"; 308 return; 309 } 310 311 186 312 my $short_doc_file; 187 313 188 # Save collection as either Greenstone Archive or METS format 189 if ($service eq "import") { 190 my $doc_file 191 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml"); 192 193 # define doctxt.xml file 194 my $doc_txt_file 195 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir,"doctxt.xml"); 196 197 my $import_working_dir 198 =&util::filename_cat ($self->{'archive_dir'}, $doc_dir); 199 200 # define docmets.xml file 201 my $doc_mets_file 202 = &util::filename_cat ($self->{'archive_dir'},$doc_dir, "docmets.xml"); 203 204 if ($save_as eq "GA") { 205 $short_doc_file = util::filename_cat ($doc_dir, "doc.xml"); 206 } elsif ($save_as eq "METS") { 207 #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml"); 208 $short_doc_file = &util::filename_cat ($doc_dir, "docmets.xml"); 209 } else { 210 return; 211 } 212 213 if ($save_as eq "GA") { 214 if (!open (OUTDOC, ">$doc_file")) { 215 print $outhandle "docsave::process could not write to file $doc_file\n"; 216 return; 217 } 218 # save this document 219 $self->output_xml_header('docsave::OUTDOC'); 220 $doc_obj->output_section('docsave::OUTDOC', 221 $doc_obj->get_top_section()); 222 $self->output_xml_footer('docsave::OUTDOC'); 223 224 close OUTDOC; 225 } elsif ($save_as eq "METS") { 226 # save the document without metadata:doctxt.xml 227 228 if (!open(OUTDOC_TXT, ">$doc_txt_file")){ 229 print $outhandle "docsave::process could not write to file $doc_txt_file\n"; 230 return; 231 } 232 233 $self->output_txt_xml_header('docsave::OUTDOC_TXT'); 234 $doc_obj->output_txt_section('docsave::OUTDOC_TXT', $doc_obj->get_top_section()); 235 #$self->output_txt_xml_footer('docsave::OUTDOC_TXT'); 236 237 # Convert doctxt.xml file to docmets.xml 238 if (!open(OUTDOC_METS,">$doc_mets_file")){ 239 print $outhandle "docsave::process could not write to file $doc_mets_file\n"; 240 return; 241 } 242 243 $self->output_mets_xml_header('docsave::OUTDOC_METS', $OID); 244 $doc_obj->output_mets_section('docsave::OUTDOC_METS', 245 $doc_obj->get_top_section()); 246 $self->output_mets_xml_footer('docsave::OUTDOC_METS'); 247 248 close OUTDOC_TXT; 249 close OUTDOC_METS; 250 } else { # save_as isn't GA or METS 251 print $outhandle "docsave::process unrecognised saveas type, $save_as\n"; 252 return; 253 } 254 } 255 256 ## Export the collection to METs format or DSpace Archive Format into the export directory 257 if ($service eq "export") { 258 my $doc_dc_file; 259 my $doc_contents_file; 260 261 my $export_working_dir 262 =&util::filename_cat ($self->{'export_dir'}, $doc_dir); 263 264 if ($save_as eq "METS") { 265 $doc_mets_file 266 = &util::filename_cat ($self->{'export_dir'},$doc_dir, "docmets.xml"); 267 268 $doc_txt_file 269 = &util::filename_cat ($self->{'export_dir'},$doc_dir, "doctxt.xml"); 270 271 if (!open(OUTDOC_EXPORT_TXT, ">$doc_txt_file")){ 272 print $outhandle "docsave::process could not write TXT to file $doc_txt_file\n"; 273 return; 274 } 275 276 $self->output_txt_xml_header('docsave::OUTDOC_EXPORT_TXT'); 277 $doc_obj->output_txt_section('docsave::OUTDOC_EXPORT_TXT', $doc_obj->get_top_section()); 278 279 if (!open(OUTDOC_EXPORT_METS,">$doc_mets_file")){ 280 print $outhandle "docsave::process could not write METS format to file $doc_mets_file\n"; 281 return; 282 } 283 $self->output_mets_xml_header('docsave::OUTDOC_EXPORT_METS', $OID); 284 $doc_obj->output_mets_section('docsave::OUTDOC_EXPORT_METS',$doc_obj->get_top_section(), $export_working_dir); 285 $self->output_mets_xml_footer('docsave::OUTDOC_EXPORT_METS'); 286 287 close OUTDOC_EXPORT_TXT; 288 close OUTDOC_EXPORT_METS; 289 } elsif ($save_as eq "DSpace") { 290 291 # Generate dublin_core.xml file 292 $doc_dc_file 293 = &util::filename_cat ($self->{'export_dir'},$doc_dir, "dublin_core.xml"); 294 295 if (!open(OUTDOC_EXPORT_DC,">$doc_dc_file")){ 296 print $outhandle "docsave::process could not write dublin core to file $doc_dc_file\n"; 297 return; 298 } 299 300 $self->output_dc_xml_header('docsave::OUTDOC_EXPORT_DC', $OID); 301 $doc_obj->output_dc_section('docsave::OUTDOC_EXPORT_DC',$doc_obj->get_top_section(), $export_working_dir); 302 $self->output_dc_xml_footer('docsave::OUTDOC_EXPORT_DC'); 303 304 close OUTDOC_EXPORT_DC; 305 close OUTDOC_EXPORT_CONTENTS; 306 } else { # save_as isn't METS or DSpace 307 print $outhandle "docsave::process unrecognised saveas type, $save_as\n"; 308 return; 309 } 310 311 if ($save_as eq "METS") { 312 $short_doc_file = util::filename_cat ($doc_dir, "docmets.xml"); 313 } elsif ($save_as eq "DSpace") { 314 #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml"); 315 $short_doc_file=&util::filename_cat ($doc_dir, "dublin_core.xml"); 316 } else { 317 return; 318 } 319 320 } 314 if ($save_as eq "GA") { 315 $short_doc_file = util::filename_cat ($doc_dir, "doc.xml"); 316 } elsif ($save_as eq "METS") { 317 $short_doc_file = &util::filename_cat ($doc_dir, "docmets.xml"); 318 } elsif ($save_as eq "DSpace") { 319 $short_doc_file=&util::filename_cat ($doc_dir, "dublin_core.xml"); 320 } else { 321 return; 322 } 323 321 324 #save for later (for close_file_output()) 322 325 $self->{'short_doc_file'} = $short_doc_file; … … 336 339 my ($metadata); 337 340 if (defined ($self->{'sortmeta'})) { 338 $metadata = $doc_obj->get_metadata_element($doc_obj->get_top_section(), 339 $self->{'sortmeta'}); 341 $metadata = $doc_obj->get_metadata_element($top_section,$self->{'sortmeta'}); 340 342 } 341 343 if (defined ($metadata) && $metadata) { … … 349 351 $metadata = &sorttools::format_metadata_for_sorting($self->{'sortmeta'}, $metadata, $doc_obj); 350 352 } 351 # store reference in the archive_info and export_info 352 if ($service eq "export") { 353 $self->{'export_info'}->add_info($OID, $short_doc_file, $metadata); 354 } elsif ($service eq "import") { 355 $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata); 356 } 353 354 # store reference in the relevant info object (archive_info,export_info,...) 355 $output_info->add_info($OID, $short_doc_file, $metadata); 357 356 } 358 357 … … 414 413 my $self = shift (@_); 415 414 my ($OID, $source_filename) = @_; 416 my $doc_info; 415 416 my $service = $self-> {'service'}; 417 418 my $working_dir = $self->getoutputdir(); 419 my $working_info = $self->getoutputinfo(); 420 return if (!defined $working_info); 421 422 my $doc_info = $working_info->get_info($OID); 417 423 my $doc_dir = ''; 418 my $service = $self-> {'service'}; 419 my $working_dir; 420 my $working_info; 421 422 if ($service eq "import") { 423 $doc_info = $self->{'archive_info'}->get_info($OID); 424 $working_dir = $self->{'archive_dir'}; 425 $working_info = $self->{'archive_info'}; 426 } elsif ($service eq "export") { 427 $doc_info =$self->{'export_info'}->get_info($OID); 428 $working_dir = $self->{'export_dir'}; 429 $working_info = $self->{'export_info'}; 430 } else { 431 return; 432 } 424 433 425 if (defined $doc_info && scalar(@$doc_info) >= 1) { 434 426 # this OID already has an assigned directory, use the … … 449 441 # have to get a new document directory 450 442 451 if ( $service eq "import") {443 if (($service eq "import") || ($service eq "unbuild")) { 452 444 my $doc_dir_rest = $OID; 453 445 my $doc_dir_num = 0; … … 465 457 else { 466 458 # Export formats such as DSpace need the directory structure to 467 # be flat. This is simple to arrange (set 'doc_dir' to b itthe459 # be flat. This is simple to arrange (set 'doc_dir' to be the 468 460 # documents OID) but breaks Windows 3.1 file system compliance. 469 # Such a loss is not a bi tthing in this situation as such461 # Such a loss is not a big thing in this situation as such 470 462 # systems don't run on Windows 3.1 anyway. 471 463 … … 485 477 486 478 my $outhandle = $self->{'outhandle'}; 487 479 my $service = $self->{'service'}; 480 my $save_as = $self->{'saveas'}; 481 482 my $output_dir = $self->getoutputdir(); 483 return if (!defined $output_dir); 484 485 my $working_dir = &util::filename_cat($output_dir, $doc_dir); 486 488 487 my @assoc_files = (); 489 488 my $filename;; 490 my $working_dir;491 my $service = $self->{'service'};492 my $save_as = $self->{'saveas'};493 494 if ($service eq "import") {495 $working_dir = $self->{'archive_dir'};496 } elsif ($service eq "export"){497 $working_dir = $self->{'export_dir'};498 } else {499 return;500 }501 489 502 490 my $source_filename = $doc_obj->get_source_filename(); … … 523 511 print $handle "$tail_filename\n"; 524 512 525 $filename = &util::filename_cat($working_dir, $ doc_dir, $tail_filename);513 $filename = &util::filename_cat($working_dir, $tail_filename); 526 514 &util::hard_link ($source_filename, $filename); 527 515 } … … 552 540 } 553 541 554 $filename = &util::filename_cat($working_dir, $ doc_dir, $afile);542 $filename = &util::filename_cat($working_dir, $afile); 555 543 556 544 … … 605 593 } 606 594 607 # store reference in the archive_info and export_infor 608 if ($service eq "import") { 609 $self->{'archive_info'}->add_info($OID, $short_doc_file); 610 } elsif ($service eq "export") { 611 $self->{'export_info'}->add_info($OID, $short_doc_file); 612 } else { 613 return; 614 } 595 # store reference in relevant info object (archive_info,export_info,...) 596 my $output_info = $self->getoutputinfo(); 597 return 0 if (!defined $output_info); 598 $output_info->add_info($OID, $short_doc_file); 599 615 600 return 1; 616 601 } … … 643 628 my $self = shift(@_); 644 629 my ($handle) = @_; 645 print $handle "<the end of the file>\n";630 # Nothing needs to be output at present 646 631 } 647 632 648 633 sub output_mets_xml_header(){ 649 634 my $self = shift(@_); 650 my ($handle, $OID) = @_; 635 my ($handle, $OID, $doc_title) = @_; 636 637 my $version = $self->{'saveas_version'}; 638 639 my $extra_attr = ""; 640 if ($version eq "fedora") { 641 my $fnamespace = $ENV{'FEDORA_PID_NAMESPACE'}; 642 my $oid_namespace = (defined $fnamespace) ? $fnamespace : "test"; 643 644 $extra_attr = "OBJID=\"$oid_namespace:$OID\" TYPE=\"FedoraObject\" LABEL=\"$doc_title\""; 645 } 646 else { 647 # Greenstone METS profile 648 $extra_attr = "OBJID=\"$OID:2\""; 649 } 650 651 651 652 652 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n"; … … 659 659 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/' . "\n"; 660 660 print $handle ' http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd"' . "\n"; 661 print $handle ' OBJID="'. $OID. ':2">' . "\n"; 661 print $handle " $extra_attr>\n"; 662 663 if ($version eq "fedora") { 664 print $handle '<mets:metsHdr RECORDSTATUS="A"/>'. "\n"; # A = active 665 } 666 662 667 } 663 668
Note:
See TracChangeset
for help on using the changeset viewer.