Changeset 8517
- Timestamp:
- 2004-11-11T14:31:46+13:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/docsave.pm
r8094 r8517 35 35 36 36 use arcinfo; 37 use expinfo; 37 38 use docproc; 38 39 use util; … … 44 45 45 46 sub new { 46 my ($class, $collection, $ archive_info, $verbosity,47 $gzip, $groupsize, $outhandle ) = @_;47 my ($class, $collection, $info, $verbosity, 48 $gzip, $groupsize, $outhandle, $service, $saveas) = @_; 48 49 my $self = new docproc (); 49 50 50 51 51 $groupsize=1 unless defined $groupsize; 52 52 $self->{'collection'} = $collection; 53 $self->{'archive_info'} = $archive_info; 53 if ($service eq "import"){ 54 $self->{'archive_info'} = $info; 55 } elsif ($service eq "export"){ 56 $self->{'export_info'} = $info; 57 } else { 58 return; 59 } 60 54 61 $self->{'verbosity'} = $verbosity; 55 62 $self->{'gzip'} = $gzip; … … 60 67 $self->{'outhandle'} = 'STDERR'; 61 68 $self->{'outhandle'} = $outhandle if defined $outhandle; 69 $self->{'service'} = $service; 70 $self->{'saveas'} = $saveas; 71 62 72 # set a default for the archive directory 63 $self->{'archive_dir'} = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives"); 64 73 if ($service eq "import"){ 74 $self->{'archive_dir'} = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives"); 75 } elsif ($service eq "export") { 76 # set a default for the export directory 77 $self->{'export_dir'} = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "export"); 78 } else { 79 return; 80 } 65 81 $self->{'sortmeta'} = undef; 66 82 … … 76 92 } 77 93 94 sub setexportdir { 95 my $self = shift (@_); 96 my ($export_dir) = @_; 97 98 &util::mk_all_dir ($export_dir) unless -e $export_dir; 99 $self->{'export_dir'} = $export_dir; 100 } 101 78 102 sub set_sortmeta { 79 103 my $self = shift (@_); … … 81 105 82 106 $self->{'sortmeta'} = $sortmeta; 83 }107 } 84 108 85 109 sub process { … … 88 112 89 113 my $outhandle = $self->{'outhandle'}; 90 114 my $service = $self->{'service'} || "import"; 115 116 # Define the SaveAs Type 117 my $save_as = $self->{'saveas'} || "GA"; 118 my $collection = $self->{'collection'}; 119 91 120 if ($self->{'groupsize'} > 1) { 92 121 $self->group_process ($doc_obj); … … 94 123 } 95 124 125 my $OID = $doc_obj->get_OID(); 126 $OID = "NULL" unless defined $OID; 127 128 # get document's directory 129 my $doc_dir = $self->get_doc_dir ($OID); 130 96 131 # groupsize is 1 (i.e. one document per XML file) so sortmeta 97 132 # may be used 98 99 my $OID = $doc_obj->get_OID(); 100 $OID = "NULL" unless defined $OID; 101 102 # get document's directory 103 my $doc_dir = $self->get_doc_dir ($OID); 104 133 134 if ($service eq "import") { 135 my $archive_info = $self->{'archive_info'}; 136 } elsif ($service eq "export") { 137 my $export_info = $self->{'export_info'}; 138 } else { 139 return; 140 } 105 141 106 142 # copy all the associated files, add this information as metadata 107 143 # to the document 108 $self->process_assoc_files ($doc_obj, $doc_dir); 144 if ($service eq "export" && $save_as eq "DSpace") { 145 # open contents file 146 my $doc_contents_file 147 = &util::filename_cat ($self->{'export_dir'},$doc_dir, "contents"); 109 148 110 my $doc_file 111 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml");112 113 #***define doctxt.xml file 114 my $doc_txt_file 115 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir,"doctxt.xml"); 116 my $working_dir 117 =&util::filename_cat ($self->{'archive_dir'}, $doc_dir); 149 if (!open(OUTDOC_EXPORT_CONTENTS,">$doc_contents_file")){ 150 print $outhandle "docsave::process could not write collection contents to file $doc_contents_file\n"; 151 return; 152 } 153 $self->process_assoc_files ($doc_obj, $doc_dir, 'docsave::OUTDOC_EXPORT_CONTENTS'); 154 } else { 155 $self->process_assoc_files ($doc_obj, $doc_dir, ''); 156 } 118 157 119 #***define docmets.xmlfile 120 my $doc_mets_file 121 = &util::filename_cat ($self->{'archive_dir'},$doc_dir, "docmets.xml"); 158 my $doc_file; 159 my $doc_mets_file; 160 my $doc_txt_file; 161 my $short_doc_file; 162 163 #Import collection to GS2 in GS Archive format and METs format 164 if ($service eq "import") { 165 my $doc_file 166 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml"); 167 168 #***define doctxt.xml file 169 my $doc_txt_file 170 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir,"doctxt.xml"); 171 172 my $import_working_dir 173 =&util::filename_cat ($self->{'archive_dir'}, $doc_dir); 174 175 #***define docmets.xml file 176 my $doc_mets_file 177 = &util::filename_cat ($self->{'archive_dir'},$doc_dir, "docmets.xml"); 178 179 if ($save_as eq "GA") { 180 $short_doc_file = util::filename_cat ($doc_dir, "doc.xml"); 181 } elsif ($save_as eq "METS") { 182 #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml"); 183 $short_doc_file = &util::filename_cat ($doc_dir, "docmets.xml"); 184 } else { 185 return; 186 } 187 188 if ($save_as eq "GA") { 189 if (!open (OUTDOC, ">$doc_file")) { 190 print $outhandle "docsave::process could not write to file $doc_file\n"; 191 return; 192 } 193 # save this document 194 $self->output_xml_header('docsave::OUTDOC'); 195 $doc_obj->output_section('docsave::OUTDOC', 196 $doc_obj->get_top_section()); 197 $self->output_xml_footer('docsave::OUTDOC'); 198 199 close OUTDOC; 200 } elsif ($save_as eq "METS") { 201 # save the document without metadata:doctxt.xml 202 203 if (!open(OUTDOC_TXT, ">$doc_txt_file")){ 204 print $outhandle "docsave::process could not write to file $doc_txt_file\n"; 205 return; 206 } 207 208 $self->output_txt_xml_header('docsave::OUTDOC_TXT'); 209 $doc_obj->output_txt_section('docsave::OUTDOC_TXT', $doc_obj->get_top_section()); 210 #$self->output_txt_xml_footer('docsave::OUTDOC_TXT'); 211 212 # Convert doctxt.xml file to docmets.xml 213 if (!open(OUTDOC_METS,">$doc_mets_file")){ 214 print $outhandle "docsave::process could not write to file $doc_mets_file\n"; 215 return; 216 } 217 218 $self->output_mets_xml_header('docsave::OUTDOC_METS', $OID); 219 $doc_obj->output_mets_section('docsave::OUTDOC_METS', 220 $doc_obj->get_top_section(), 221 $import_working_dir); 222 $self->output_mets_xml_footer('docsave::OUTDOC_METS'); 223 224 close OUTDOC_TXT; 225 close OUTDOC_METS; 226 } else { # save_as isn't GA or METS 227 print $outhandle "docsave::process unrecognised saveas type, $save_as\n"; 228 return; 229 } 230 } 231 232 ## Export the collection to METs format or DSpace Archive Format into the export directory 233 if ($service eq "export") { 234 my $doc_dc_file; 235 my $doc_contents_file; 236 237 my $export_working_dir 238 =&util::filename_cat ($self->{'export_dir'}, $doc_dir); 239 240 if ($save_as eq "METS") { 241 $doc_mets_file 242 = &util::filename_cat ($self->{'export_dir'},$doc_dir, "docmets.xml"); 243 244 $doc_txt_file 245 = &util::filename_cat ($self->{'export_dir'},$doc_dir, "doctxt.xml"); 246 247 if (!open(OUTDOC_EXPORT_TXT, ">$doc_txt_file")){ 248 print $outhandle "docsave::process could not write TXT to file $doc_txt_file\n"; 249 return; 250 } 251 252 $self->output_txt_xml_header('docsave::OUTDOC_EXPORT_TXT'); 253 $doc_obj->output_txt_section('docsave::OUTDOC_EXPORT_TXT', $doc_obj->get_top_section()); 254 255 if (!open(OUTDOC_EXPORT_METS,">$doc_mets_file")){ 256 print $outhandle "docsave::process could not write METS format to file $doc_mets_file\n"; 257 return; 258 } 259 $self->output_mets_xml_header('docsave::OUTDOC_EXPORT_METS', $OID); 260 $doc_obj->output_mets_section('docsave::OUTDOC_EXPORT_METS',$doc_obj->get_top_section(), $export_working_dir); 261 $self->output_mets_xml_footer('docsave::OUTDOC_EXPORT_METS'); 262 263 close OUTDOC_EXPORT_TXT; 264 close OUTDOC_EXPORT_METS; 265 } elsif ($save_as eq "DSpace") { 266 267 # Generate dublin_core.xml file 268 $doc_dc_file 269 = &util::filename_cat ($self->{'export_dir'},$doc_dir, "dublin_core.xml"); 270 271 if (!open(OUTDOC_EXPORT_DC,">$doc_dc_file")){ 272 print $outhandle "docsave::process could not write dublin core to file $doc_dc_file\n"; 273 return; 274 } 275 276 $self->output_dc_xml_header('docsave::OUTDOC_EXPORT_DC', $OID); 277 $doc_obj->output_dc_section('docsave::OUTDOC_EXPORT_DC',$doc_obj->get_top_section(), $export_working_dir); 278 $self->output_dc_xml_footer('docsave::OUTDOC_EXPORT_DC'); 279 280 close OUTDOC_EXPORT_DC; 281 close OUTDOC_EXPORT_CONTENTS; 282 } else { # save_as isn't METS or DSpace 283 print $outhandle "docsave::process unrecognised saveas type, $save_as\n"; 284 return; 285 } 286 287 if ($save_as eq "METS") { 288 $short_doc_file = util::filename_cat ($doc_dir, "docmets.xml"); 289 } elsif ($save_as eq "DSpace") { 290 #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml"); 291 $short_doc_file=&util::filename_cat ($doc_dir, "dublin_core.xml"); 292 } else { 293 return; 294 } 295 296 } 297 #save for later (for close_file_output()) 298 $self->{'short_doc_file'} = $short_doc_file; 122 299 123 my $short_doc_file;124 my $save_as = $self->{'saveas'} || "GA";125 if ($save_as eq "GA") {126 $short_doc_file = util::filename_cat ($doc_dir, "doc.xml");127 } elsif ($save_as eq "METS") {128 #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml");129 $short_doc_file=&util::filename_cat ($doc_dir, "docmets.xml");130 } else {131 return;132 }133 # save for later (for close_file_output())134 $self->{'short_doc_file'}=$short_doc_file;135 136 if ($save_as eq "GA") {137 if (!open (OUTDOC, ">$doc_file")) {138 print $outhandle "docsave::process could not write to file $doc_file\n";139 return;140 }141 142 # save this document143 $self->output_xml_header('docsave::OUTDOC');144 $doc_obj->output_section('docsave::OUTDOC',145 $doc_obj->get_top_section());146 $self->output_xml_footer('docsave::OUTDOC');147 148 close OUTDOC;149 } elsif ($save_as eq "METS") {150 # save the document without metadata:doctxt.xml151 152 if (!open(OUTDOC_TXT, ">$doc_txt_file")){153 print $outhandle "docsave::process could not write to file $doc_mets_file\n";154 return;155 }156 157 $self->output_txt_xml_header('docsave::OUTDOC_TXT');158 $doc_obj->output_txt_section('docsave::OUTDOC_TXT', $doc_obj->get_top_section());159 #$self->output_txt_xml_footer('docsave::OUTDOC_TXT');160 161 # Convert doctxt.xml file to docmets.xml162 if (!open(OUTDOC_METS,">$doc_mets_file")){163 print $outhandle "docsave::process could not write to file $doc_mets_file\n";164 return;165 }166 167 $self->output_mets_xml_header('docsave::OUTDOC_METS', $OID);168 $doc_obj->output_mets_section('docsave::OUTDOC_METS',169 $doc_obj->get_top_section(),170 $working_dir);171 $self->output_mets_xml_footer('docsave::OUTDOC_METS');172 173 close OUTDOC_TXT;174 close OUTDOC_METS;175 } else { # save_as isn't GA or METS176 print $outhandle "docsave::process unrecognised saveas type, $save_as\n";177 return;178 }179 180 300 if ($self->{'gzip'}) { 181 301 my $doc_file = $self->{'gs_filename'}; … … 190 310 191 311 # do the sortmeta thing 192 my ($metadata); if (defined ($self->{'sortmeta'})) { 312 my ($metadata); 313 if (defined ($self->{'sortmeta'})) { 193 314 $metadata = $doc_obj->get_metadata_element($doc_obj->get_top_section(), 194 315 $self->{'sortmeta'}); 195 316 } 196 317 197 # store reference in the archive_info 198 $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata); 318 # store reference in the archive_info and export_info 319 if ($service eq "export") { 320 $self->{'export_info'}->add_info($OID, $short_doc_file, $metadata); 321 } elsif ($service eq "import") { 322 $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata); 323 } 199 324 } 200 325 … … 253 378 } 254 379 255 256 380 sub get_doc_dir { 257 381 my $self = shift (@_); 258 382 my ($OID) = @_; 259 260 my $doc_info = $self->{'archive_info'}->get_info($OID); 261 my $doc_dir = ""; 383 my $doc_info; 384 my $doc_dir; 385 my $service = $self-> {'service'}; 386 my $working_dir; 387 my $working_info; 388 389 if ($service eq "import") { 390 $doc_info = $self->{'archive_info'}->get_info($OID); 391 $working_dir = $self->{'archive_dir'}; 392 $working_info = $self->{'archive_info'}; 393 } elsif ($service eq "export") { 394 $doc_info =$self->{'export_info'}->get_info($OID); 395 $working_dir = $self->{'export_dir'}; 396 $working_info = $self->{'export_info'}; 397 } else { 398 return; 399 } 262 400 if (defined $doc_info && scalar(@$doc_info) >= 1) { 263 401 # this OID already has an assigned directory, use the … … 276 414 } 277 415 } while ($doc_dir_rest ne "" && 278 ((-d &util::filename_cat ($self->{'archive_dir'}, "$doc_dir.dir")) || 279 ($self->{'archive_info'}->size() >= 1024 && $doc_dir_num < 2))); 416 ((-d &util::filename_cat ($working_dir, "$doc_dir.dir")) || 417 ($working_info->size() >= 1024 && $doc_dir_num < 2))); 418 280 419 $doc_dir .= ".dir"; 281 282 } 283 284 &util::mk_all_dir (&util::filename_cat ($self->{'archive_dir'}, $doc_dir)); 285 420 &util::mk_all_dir (&util::filename_cat ($working_dir, $doc_dir)); 421 } 286 422 return $doc_dir; 287 423 } 288 424 289 290 425 sub process_assoc_files { 291 426 my $self = shift (@_); 292 my ($doc_obj, $doc_dir ) = @_;427 my ($doc_obj, $doc_dir, $handle) = @_; 293 428 294 429 my $outhandle = $self->{'outhandle'}; 295 430 296 431 my @assoc_files = (); 432 my $filename;; 433 my $working_dir; 434 my $service = $self->{'service'}; 435 my $save_as = $self->{'saveas'}; 436 437 if ($service eq "import") { 438 $working_dir = $self->{'archive_dir'}; 439 } elsif ($service eq "export"){ 440 $working_dir = $self->{'export_dir'}; 441 } else { 442 return; 443 } 444 $doc_obj->get_source_filename()=~ /\/[^\/\\]$/; 445 446 if ($save_as eq "DSpace") { 447 print $handle "$1\n"; 448 $filename = &util::filename_cat($working_dir, $doc_dir, $1); 449 &util::hard_link ($doc_obj->get_source_filename(), $filename); 450 } 451 297 452 foreach my $assoc_file (@{$doc_obj->get_assoc_files()}) { 298 453 my ($dir, $afile) = $assoc_file->[1] =~ /^(.*?)([^\/\\]+)$/; 299 454 $dir = "" unless defined $dir; 455 456 # Store the associated file to the "contents" file 457 if ($save_as eq "DSpace") { 458 print $handle "$assoc_file->[1]\n"; 459 } 460 300 461 if (-e $assoc_file->[0]) { 301 my $filepath = &util::filename_cat($self->{'archive_dir'}, $doc_dir, $afile); 302 &util::hard_link ($assoc_file->[0], $filepath); 462 $filename = &util::filename_cat($working_dir, $doc_dir, $afile); 463 464 &util::hard_link ($assoc_file->[0], $filename); 465 303 466 $doc_obj->add_utf8_metadata ($doc_obj->get_top_section(), 304 467 "gsdlassocfile", … … 318 481 { 319 482 my ($self) = @_; 483 my $service =$self->{'service'}; 320 484 321 485 # make sure that the handle has been opened - it won't be if we failed … … 331 495 if (exists($self->{'saveas'}) && $self->{'saveas'} eq "METS") { 332 496 $short_doc_file=$self->{'short_doc_file'}; 333 } els e{ # "GA"497 } elsif ($self->{'saveas'} eq "GA") { # "GA" 334 498 $short_doc_file=$self->{'gs_short_filename'}; 335 } 336 499 } else { # "DSpace" 500 } 501 337 502 if ($self->{'gzip'}) { 338 503 my $doc_file = $self->{'gs_filename'}; … … 347 512 } 348 513 349 # store reference in the archive_info 350 $self->{'archive_info'}->add_info($OID, $short_doc_file); 351 514 # store reference in the archive_info and export_infor 515 if ($service eq "import") { 516 $self->{'archive_info'}->add_info($OID, $short_doc_file); 517 } elsif ($service eq "export") { 518 $self->{'export_info'}->add_info($OID, $short_doc_file); 519 } else { 520 return; 521 } 352 522 return 1; 353 523 } … … 386 556 my $self = shift(@_); 387 557 my ($handle, $OID) = @_; 558 388 559 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n"; 389 560 print $handle '<!DOCTYPE Archive SYSTEM "http://greenstone.org/dtd/Archive/1.0/Archive.dtd">' . "\n"; … … 397 568 } 398 569 570 sub output_dc_xml_header(){ 571 my $self = shift(@_); 572 my ($handle, $OID) = @_; 573 574 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n"; 575 # print $handle '<!DOCTYPE Archive SYSTEM "http://greenstone.org/dtd/Archive/1.0/Archive.dtd">' . "\n"; 576 print $handle '<dublin_core>' . "\n"; 577 } 578 579 sub output_dc_xml_footer() { 580 my $self = shift(@_); 581 my ($handle) = @_; 582 print $handle '</dublin_core>' . "\n"; 583 } 399 584 1; 400 401 402
Note:
See TracChangeset
for help on using the changeset viewer.