Changeset 8094
- Timestamp:
- 2004-09-02T11:09:20+12:00 (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/docsave.pm
r8079 r8094 27 27 # archives directory of a collection (as xml) 28 28 29 use strict; 30 no strict 'refs'; 29 31 30 32 package docsave; … … 38 40 39 41 sub BEGIN { 40 @ ISA = ('docproc');42 @docsave::ISA = ('docproc'); 41 43 } 42 44 … … 56 58 $self->{'gs_count'} = 0; 57 59 58 $self->{'outhandle'} = STDERR;60 $self->{'outhandle'} = 'STDERR'; 59 61 $self->{'outhandle'} = $outhandle if defined $outhandle; 60 62 # set a default for the archive directory … … 89 91 if ($self->{'groupsize'} > 1) { 90 92 $self->group_process ($doc_obj); 93 return; 94 } 95 96 # groupsize is 1 (i.e. one document per XML file) so sortmeta 97 # may be used 91 98 99 my $OID = $doc_obj->get_OID(); 100 $OID = "NULL" unless defined $OID; 101 102 # get document's directory 103 my $doc_dir = $self->get_doc_dir ($OID); 104 105 106 # copy all the associated files, add this information as metadata 107 # to the document 108 $self->process_assoc_files ($doc_obj, $doc_dir); 109 110 my $doc_file 111 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml"); 112 113 #***define doctxt.xml file 114 my $doc_txt_file 115 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir,"doctxt.xml"); 116 my $working_dir 117 =&util::filename_cat ($self->{'archive_dir'}, $doc_dir); 118 119 #***define docmets.xmlfile 120 my $doc_mets_file 121 = &util::filename_cat ($self->{'archive_dir'},$doc_dir, "docmets.xml"); 122 123 my $short_doc_file; 124 my $save_as = $self->{'saveas'} || "GA"; 125 if ($save_as eq "GA") { 126 $short_doc_file = util::filename_cat ($doc_dir, "doc.xml"); 127 } elsif ($save_as eq "METS") { 128 #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml"); 129 $short_doc_file=&util::filename_cat ($doc_dir, "docmets.xml"); 92 130 } else { 93 # groupsize is 1 (i.e. one document per XML file) so sortmeta 94 # may be used 131 return; 132 } 133 # save for later (for close_file_output()) 134 $self->{'short_doc_file'}=$short_doc_file; 135 136 if ($save_as eq "GA") { 137 if (!open (OUTDOC, ">$doc_file")) { 138 print $outhandle "docsave::process could not write to file $doc_file\n"; 139 return; 140 } 141 142 # save this document 143 $self->output_xml_header('docsave::OUTDOC'); 144 $doc_obj->output_section('docsave::OUTDOC', 145 $doc_obj->get_top_section()); 146 $self->output_xml_footer('docsave::OUTDOC'); 147 148 close OUTDOC; 149 } elsif ($save_as eq "METS") { 150 # save the document without metadata:doctxt.xml 151 152 if (!open(OUTDOC_TXT, ">$doc_txt_file")){ 153 print $outhandle "docsave::process could not write to file $doc_mets_file\n"; 154 return; 155 } 156 157 $self->output_txt_xml_header('docsave::OUTDOC_TXT'); 158 $doc_obj->output_txt_section('docsave::OUTDOC_TXT', $doc_obj->get_top_section()); 159 #$self->output_txt_xml_footer('docsave::OUTDOC_TXT'); 95 160 96 my $OID = $doc_obj->get_OID(); 97 $OID = "NULL" unless defined $OID; 98 99 100 # get document's directory 101 my $doc_dir = $self->get_doc_dir ($OID); 102 103 104 # copy all the associated files, add this information as metadata 105 # to the document 106 $self->process_assoc_files ($doc_obj, $doc_dir); 107 108 my $doc_file 109 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml"); 110 111 #***define doctxt.xml file 112 my $doc_txt_file 113 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir,"doctxt.xml"); 114 my $working_dir 115 =&util::filename_cat ($self->{'archive_dir'}, $doc_dir); 116 117 #***define docmets.xmlfile 118 my $doc_mets_file 119 = &util::filename_cat ($self->{'archive_dir'},$doc_dir, "docmets.xml"); 120 121 my $short_doc_file = &util::filename_cat ($doc_dir, "doc.xml"); 122 #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml"); 123 my $short_mets_doc_file=&util::filename_cat ($doc_dir, "docmets.xml"); 124 125 my $save_as = $self->{'saveas'}; 126 127 if ($save_as eq "GA") { 128 if (!open (OUTDOC, ">$doc_file")) { 129 print $outhandle "docsave::process could not write to file $doc_file\n"; 130 return; 131 } 161 # Convert doctxt.xml file to docmets.xml 162 if (!open(OUTDOC_METS,">$doc_mets_file")){ 163 print $outhandle "docsave::process could not write to file $doc_mets_file\n"; 164 return; 165 } 132 166 133 # save this document 134 $self->output_xml_header('docsave::OUTDOC'); 135 $doc_obj->output_section('docsave::OUTDOC', $doc_obj->get_top_section()); 136 $self->output_xml_footer('docsave::OUTDOC'); 137 138 close OUTDOC; 139 } 140 elsif ($save_as eq "METS") { 141 # save the document without metadata:doctxt.xml 142 143 if (!open(OUTDOC_TXT, ">$doc_txt_file")){ 144 print $outhandle "docsave::process could not write to file $doc_mets_file\n"; 145 return; 146 } 147 148 $self->output_txt_xml_header('docsave::OUTDOC_TXT'); 149 $doc_obj->output_txt_section('docsave::OUTDOC_TXT', $doc_obj->get_top_section()); 150 #$self->output_txt_xml_footer('docsave::OUTDOC_TXT'); 151 152 # Convert doctxt.xml file to docmets.xml 153 if (!open(OUTDOC_METS,">$doc_mets_file")){ 154 print $outhandle "docsave::process could not write to file $doc_mets_file\n"; 155 return; 156 } 157 158 $self->output_mets_xml_header('docsave::OUTDOC_METS', $OID); 159 $doc_obj->output_mets_section('docsave::OUTDOC_METS',$doc_obj->get_top_section(), $working_dir); 160 $self->output_mets_xml_footer('docsave::OUTDOC_METS'); 161 162 close OUTDOC_TXT; 163 close OUTDOC_METS; 164 } 165 else { 166 print $outhandle "docsave::process unrecognised saveas type, $save_as\n"; 167 return; 168 } 169 170 if ($self->{'gzip'}) { 171 my $doc_file = $self->{'gs_filename'}; 172 `gzip $doc_file`; 173 $doc_file .= ".gz"; 174 $short_doc_file .= ".gz"; 175 if (!-e $doc_file) { 176 print $outhandle "error while gzipping: $doc_file doesn't exist\n"; 177 return 0; 178 } 179 } 180 181 # do the sortmeta thing 182 my ($metadata); if (defined ($self->{'sortmeta'})) { 183 $metadata = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'sortmeta'}); 184 } 185 186 # store reference in the archive_info 187 if ($self->{'saveas'} eq "METS"){ 188 $self->{'archive_info'}->add_info($OID, $short_mets_doc_file, $metadata); 189 } else { 190 $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata); 191 } 192 } 167 $self->output_mets_xml_header('docsave::OUTDOC_METS', $OID); 168 $doc_obj->output_mets_section('docsave::OUTDOC_METS', 169 $doc_obj->get_top_section(), 170 $working_dir); 171 $self->output_mets_xml_footer('docsave::OUTDOC_METS'); 172 173 close OUTDOC_TXT; 174 close OUTDOC_METS; 175 } else { # save_as isn't GA or METS 176 print $outhandle "docsave::process unrecognised saveas type, $save_as\n"; 177 return; 178 } 179 180 if ($self->{'gzip'}) { 181 my $doc_file = $self->{'gs_filename'}; 182 `gzip $doc_file`; 183 $doc_file .= ".gz"; 184 $short_doc_file .= ".gz"; 185 if (!-e $doc_file) { 186 print $outhandle "error while gzipping: $doc_file doesn't exist\n"; 187 return 0; 188 } 189 } 190 191 # do the sortmeta thing 192 my ($metadata); if (defined ($self->{'sortmeta'})) { 193 $metadata = $doc_obj->get_metadata_element($doc_obj->get_top_section(), 194 $self->{'sortmeta'}); 195 } 196 197 # store reference in the archive_info 198 $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata); 193 199 } 194 200 … … 289 295 290 296 my @assoc_files = (); 291 foreach $assoc_file (@{$doc_obj->get_assoc_files()}) {297 foreach my $assoc_file (@{$doc_obj->get_assoc_files()}) { 292 298 my ($dir, $afile) = $assoc_file->[1] =~ /^(.*?)([^\/\\]+)$/; 293 299 $dir = "" unless defined $dir; … … 321 327 322 328 my $OID = $self->{'gs_OID'}; 323 my $short_doc_file = $self->{'gs_short_filename'}; 329 my $short_doc_file; 330 # can we use 'short_doc_file' for GA too? 331 if (exists($self->{'saveas'}) && $self->{'saveas'} eq "METS") { 332 $short_doc_file=$self->{'short_doc_file'}; 333 } else { # "GA" 334 $short_doc_file=$self->{'gs_short_filename'}; 335 } 324 336 325 337 if ($self->{'gzip'}) { … … 336 348 337 349 # store reference in the archive_info 338 if ($self->{'saveas'} eq "METS"){ 339 $self->{'archive_info'}->add_info($OID, $short_mets_doc_file); 340 } else { 341 $self->{'archive_info'}->add_info($OID, $short_doc_file); 342 } 350 $self->{'archive_info'}->add_info($OID, $short_doc_file); 343 351 344 352 return 1;
Note:
See TracChangeset
for help on using the changeset viewer.