Changeset 21306
- Timestamp:
- 2009-12-09T13:18:41+13:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/inexport.pm
r20788 r21306 33 33 use GDBMUtils; 34 34 35 sub src_db_file { 36 my ($archivedir) = @_; 37 return &util::filename_cat ($archivedir, "archiveinf-src.gdb"); 38 } 39 40 sub doc_db_file { 41 my ($archivedir) = @_; 42 return &util::filename_cat ($archivedir, "archiveinf-doc.gdb"); 43 } 44 45 sub oid_count_file { 46 my ($archivedir) = @_; 47 return &util::filename_cat ($archivedir, "OIDcount"); 48 } 49 35 50 36 51 sub prime_doc_oid_count 37 52 { 38 53 my ($archivedir) = @_; 39 my $oid_count_filename = & util::filename_cat ($archivedir, "OIDcount");54 my $oid_count_filename = &oid_count_file($archivedir); 40 55 41 56 if (-e $oid_count_filename) { … … 62 77 63 78 my ($archivedir) = @_; 64 my $oid_count_filename = & util::filename_cat ($archivedir, "OIDcount");79 my $oid_count_filename = &oid_count_file($archivedir); 65 80 66 81 … … 82 97 83 98 # in this method, we want to know if metadata files are modified or not. 84 my $doc_db = "archiveinf-doc.gdb"; 85 my $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db); 99 my $arcinfo_doc_filename = &doc_db_file($archivedir); 86 100 87 101 my $archiveinf_timestamp = -M $arcinfo_doc_filename; … … 174 188 175 189 190 191 176 192 # Deal with complication of new or modified metadata files by forcing 177 193 # everything from this point down in the file hierarchy to … … 199 215 push(@$reindex_files,$existing_f); 200 216 $block_hash->{'reindex_files'}->{$existing_f} = 1; 201 202 } 203 } 204 205 # Reindexing is accomplished by putting them in the list for reindexing (line above) 206 # and then tagging the arcinfo version as to be deleted. 207 208 _mark_docs_for_deletion($archive_info,$block_hash,$reindex_files,$archivedir,$verbosity, "reindex"); 217 delete $block_hash->{'existing_files'}->{$existing_f}; 218 219 } 220 } 209 221 210 222 # metadata file needs to be in new_files list so parsed by MetadataXMLPlug 211 223 # (or equivalent) 212 224 $block_hash->{'new_files'}->{$new_mdf} = 1; 225 226 } 227 228 # go through remaining existing files and work out what has changed and needs to be reindexed. 229 my @existing_files = sort keys %{$block_hash->{'existing_files'}}; 230 231 my $reindex_files = []; 232 233 foreach my $existing_filename (@existing_files) { 234 if (-M $existing_filename < $archiveinf_timestamp) { 235 # file is newer than last build 236 237 my $existing_file = $existing_filename; 238 #my $collectdir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}); 239 240 #my $collectdir_resafe = &util::filename_to_regex($collectdir); 241 #$existing_file =~ s/^$collectdir_resafe(\\|\/)?//; 242 243 print STDERR "**** Reindexing existing file: $existing_file\n"; 244 245 push(@$reindex_files,$existing_file); 246 $block_hash->{'reindex_files'}->{$existing_filename} = 1; 247 } 213 248 214 249 } … … 249 284 } 250 285 251 # not used anymore 252 sub is_assoc_file 286 287 # this is used to delete "deleted" docs, and to remove old versions of "changed" docs 288 # $mode is 'delete' or 'reindex' 289 sub mark_docs_for_deletion 253 290 { 254 my ($file,$doc_rec) = @_; 255 256 my ($file_root,$dirname,$suffix) = fileparse($file, "\\.[^\\.]+\$"); 257 258 foreach my $af (@{$doc_rec->{'assoc-file'}}) { 259 my $full_af = &util::filename_cat($dirname,$af); 260 261 return 1 if ($full_af eq $file); 262 } 263 264 return 0; 265 } 266 267 268 269 # this is used to delete "deleted" docs, and to remove old versions of "changed" docs 270 sub _mark_docs_for_deletion 271 { 272 my ($archive_info,$block_hash,$deleted_files,$archivedir,$verbosity,$mode_text) = @_; 273 274 my $doc_db = "archiveinf-doc.gdb"; 275 my $src_db = "archiveinf-src.gdb"; 276 my $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db); 277 my $arcinfo_src_filename = &util::filename_cat ($archivedir, $src_db); 291 my ($archive_info,$block_hash,$deleted_files,$archivedir,$verbosity,$mode) = @_; 292 293 my $mode_text = "deleted from index"; 294 if ($mode eq "reindex") { 295 $mode_text = "reindexed"; 296 } 297 my $arcinfo_doc_filename = &doc_db_file($archivedir); 298 my $arcinfo_src_filename = &src_db_file($archivedir); 278 299 279 300 … … 321 342 } 322 343 323 sub mark_docs_for_deletion324 {325 _mark_docs_for_deletion(@_,"deleted from index");326 }327 328 329 sub mark_docs_for_reindex330 {331 my ($archive_info,$block_hash,$archivedir,$verbosity) = @_;332 333 # Reindexing is accomplished by deleting the previously indexed334 # version of the document, and then allowing the new version to335 # be indexed (as would a new document be indexed).336 #337 # The first step (marking for deletion) is implemented by this routine.338 #339 # By default in Greenstone a new version of an index will hash to340 # a new unique OID, and the above strategy of reindex=delete+add341 # works fine. A special case arises when a persistent OID is342 # allocated to a document (for instance through a metadata field),343 # and the second step to reindexing (see XXXX) detects this and344 # deals with it appropriately.345 346 my @existing_files = sort keys %{$block_hash->{'existing_files'}};347 348 my $doc_db = "archiveinf-doc.gdb";349 my $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db);350 351 my $archiveinf_timestamp = -M $arcinfo_doc_filename;352 353 my $reindex_files = [];354 355 foreach my $existing_filename (@existing_files) {356 if (-M $existing_filename < $archiveinf_timestamp) {357 # file is newer than last build358 359 my $existing_file = $existing_filename;360 #my $collectdir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'});361 362 #my $collectdir_resafe = &util::filename_to_regex($collectdir);363 #$existing_file =~ s/^$collectdir_resafe(\\|\/)?//;364 365 print STDERR "**** Reindexing existing file: $existing_file\n";366 367 push(@$reindex_files,$existing_file);368 $block_hash->{'reindex_files'}->{$existing_filename} = 1;369 }370 371 }372 373 _mark_docs_for_deletion($archive_info,$block_hash,$reindex_files,$archivedir,$verbosity, "reindex");374 375 }376 377 344 378 345
Note:
See TracChangeset
for help on using the changeset viewer.