Changeset 20776 for gsdl/trunk
- Timestamp:
- 2009-10-05T15:50:20+13:00 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/inexport.pm
r20769 r20776 80 80 { 81 81 my ($archive_info,$block_hash,$importdir,$archivedir,$verbosity,$incremental_mode) = @_; 82 83 # in this method, we want to know if metadata files are modified or not. 84 my $doc_db = "archiveinf-doc.gdb"; 85 my $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db); 86 87 my $archiveinf_timestamp = -M $arcinfo_doc_filename; 82 88 83 89 # First convert all files to absolute form … … 108 114 109 115 # entry in 'all_files' is moved to either 'existing_files', 110 # 'deleted_files', 'new_files', or 'new_ metadata_files'116 # 'deleted_files', 'new_files', or 'new_or_modified_metadata_files' 111 117 112 118 if (!&util::filename_is_absolute($curr_file)) { … … 115 121 } 116 122 117 if (defined $block_hash->{'file_blocks'}->{$full_curr_file}) {118 # If in block list, we want to ignore it119 delete $block_hash->{'all_files'}->{$curr_file};120 121 if (defined $full_prev_all_files->{$full_curr_file}) {122 # also make sure it is gone from 'previous' list so123 # not mistaken for a file that needs to be deleted124 delete $full_prev_all_files->{$full_curr_file};125 }126 next;127 }128 129 123 # figure out if new file or not 130 124 if (defined $full_prev_all_files->{$full_curr_file}) { 125 # delete it so that only files that need deleting are left 126 delete $full_prev_all_files->{$full_curr_file}; 127 128 # had it before. is it a metadata file? 129 if ($block_hash->{'metadata_files'}->{$full_curr_file}) { 131 130 132 if ($incremental_mode eq "all") { 133 134 # had it before 135 $block_hash->{'existing_files'}->{$full_curr_file} = 1; 136 137 # Now remove it, so by end of loop only the files 138 # that need deleting are left 139 140 delete $full_prev_all_files->{$full_curr_file}; 131 # is it modified?? 132 if (-M $full_curr_file < $archiveinf_timestamp) { 133 print STDERR "*** Detected a modified metadata file: $full_curr_file\n" if $verbosity > 2; 134 # its newer than last build 135 $block_hash->{'new_or_modified_metadata_files'}->{$full_curr_file} = 1; 136 } 141 137 } 142 138 else { 143 # Warning in "onlyadd" mode, but had it before! 144 print STDERR "Warning: File $full_curr_file previously imported.\n"; 145 print STDERR " Treating as new file\n"; 146 147 $block_hash->{'new_files'}->{$full_curr_file} = 1; 148 delete $full_prev_all_files->{$full_curr_file}; 139 if ($incremental_mode eq "all") { 140 141 # had it before 142 $block_hash->{'existing_files'}->{$full_curr_file} = 1; 143 144 } 145 else { 146 # Warning in "onlyadd" mode, but had it before! 147 print STDERR "Warning: File $full_curr_file previously imported.\n"; 148 print STDERR " Treating as new file\n"; 149 150 $block_hash->{'new_files'}->{$full_curr_file} = 1; 151 152 } 149 153 } 150 154 } … … 157 161 # Greenstone to always be this) 158 162 159 ## print STDERR "***** Detected new metadata file: $full_curr_file\n";160 $block_hash->{'new_ metadata_files'}->{$full_curr_file} = 1;163 print STDERR "***** Detected new metadata file: $full_curr_file\n" if $verbosity > 2; 164 $block_hash->{'new_or_modified_metadata_files'}->{$full_curr_file} = 1; 161 165 } 162 166 else { … … 170 174 171 175 172 # Deal with complication of new metadata.xmlfiles by forcing176 # Deal with complication of new or modified metadata files by forcing 173 177 # everything from this point down in the file hierarchy to 174 178 # be freshly imported. … … 178 182 # associated with the relevant document(s). 179 183 180 foreach my $new_mdf (keys %{$block_hash->{'new_ metadata_files'}}) {184 foreach my $new_mdf (keys %{$block_hash->{'new_or_modified_metadata_files'}}) { 181 185 my ($fileroot,$situated_dir,$ext) = fileparse($new_mdf, "\\.[^\\.]+\$"); 182 186 … … 199 203 } 200 204 201 # Reindexing is accomplished by putting them in th list for reindexing (line above)205 # Reindexing is accomplished by putting them in the list for reindexing (line above) 202 206 # and then tagging the arcinfo version as to be deleted. 203 207 … … 279 283 my $src_rec = GDBMUtils::gdbmRecordToHash($arcinfo_src_filename,$file); 280 284 my $oids = $src_rec->{'oid'}; 285 my $file_record_deleted = 0; 281 286 foreach my $oid (@$oids) { 282 287 # Find out if it's a main doc, assoc file, or metadata 283 288 284 289 my $doc_rec = GDBMUtils::gdbmRecordToHash($arcinfo_doc_filename,$oid); 290 my $doc_source_file = $doc_rec->{'src-file'}->[0]; 291 if (!&util::filename_is_absolute($doc_source_file)) { 292 $doc_source_file = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$doc_source_file); 293 } 285 294 286 295 if (is_assoc_file($file,$doc_rec)) { 296 ## -- kjdon - here, do same thing as for metadata file?? 297 ## mark source for reimport?? 287 298 # assoc file => mark it for re-indexing (safest thing to do) 288 299 my $curr_status = $archive_info->get_status_info($oid); 289 300 290 301 # mark source doc for reimport as one of its assoc files has changed or deleted 302 $block_hash->{'reindex_files'}->{$doc_source_file} = 1; 291 303 if (defined($curr_status) && (($curr_status ne "D") && ($curr_status ne "R"))) { 292 304 if ($verbosity > 1) { … … 300 312 &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val); 301 313 } 302 GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file); 314 GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file) unless $file_record_deleted; 315 $file_record_deleted = 1; 303 316 } 304 317 else { 305 318 # either src-file or metadata.xml file linking to src-file 306 307 my $src_file;319 # actually, metadata files should not get here, as are 320 # processed earlier 308 321 309 322 if ($doc_rec->{'src-file'}->[0] ne $file) { 310 323 # it's a metadata file attached to this OID 311 # => workout the src-file it matches to 312 313 $src_file = $doc_rec->{'src-file'}->[0]; 324 # => reindex the src-file it matches to 325 326 $block_hash->{'reindex_files'}->{$doc_source_file} = 1; 327 328 # remove the metadata file from the src-database 314 329 315 my $src_filename = $src_file; 316 if (!&util::filename_is_absolute($src_file)) { 317 $src_filename = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$src_file); 318 } 319 320 $block_hash->{'reindex_files'}->{$src_filename} = 1; 321 322 # remove the metadata file from the src-database 323 GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file); 324 } 325 else { 326 # It's the main doc 327 # => make it the target and mark it for deletion 328 $src_file = $file; 330 GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file) unless $file_record_deleted; 331 $file_record_deleted = 1; 329 332 } 330 333 … … 336 339 if ($index_status ne "D") { 337 340 if ($verbosity>1) { 338 print STDERR "$oid ($ src_file) marked to be $mode_text on next buildcol.pl\n";341 print STDERR "$oid ($doc_source_file) marked to be $mode_text on next buildcol.pl\n"; 339 342 } 340 343 $archive_info->set_status_info($oid,"D"); … … 343 346 &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val); 344 347 345 GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$ src_file);348 GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$doc_source_file); 346 349 } 347 350
Note:
See TracChangeset
for help on using the changeset viewer.