Changeset 18469 for gsdl/trunk/perllib/inexport.pm
- Timestamp:
- 2009-02-06T18:19:44+13:00 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/inexport.pm
r18457 r18469 27 27 28 28 use strict; 29 30 use File::Basename; 29 31 30 32 use util; … … 53 55 } 54 56 57 55 58 # Figure out which are the new files, existing files and so 56 59 # by implication the files from the previous import that are not … … 68 71 } 69 72 73 if (defined $block_hash->{'file_blocks'}->{$full_curr_file}) { 74 # If in block list, we want to ignore it 75 delete $block_hash->{'all_files'}->{$curr_file}; 76 77 if (defined $full_prev_all_files->{$full_curr_file}) { 78 # also make sure it is gone from 'previous' list so 79 # not mistaken for a file that needs to be deleted 80 delete $full_prev_all_files->{$full_curr_file}; 81 } 82 next; 83 } 84 70 85 # figure of if new file or not 71 86 if (defined $full_prev_all_files->{$full_curr_file}) { 87 72 88 # had it before 73 $block_hash->{'existing_files'}->{$curr_file} = 1; 89 $block_hash->{'existing_files'}->{$full_curr_file} = 1; 90 74 91 # Now remove it, so by end of loop only the files 75 92 # that need deleting are left … … 78 95 } 79 96 else { 80 $block_hash->{'new_files'}->{$ curr_file} = 1;97 $block_hash->{'new_files'}->{$full_curr_file} = 1; 81 98 } 82 99 … … 84 101 } 85 102 86 # By this point full_prev_all_files contains only the files 87 # that are not in the current import folder => i.e. files 88 # to be deleted 103 # By this point full_prev_all_files contains the files 104 # mentioned in archiveinf-src.db but are not in the 'import' 105 # folder (or whatever was specified through -importdir ...) 106 107 # This list can contain files that were created in the 'tmp' or 108 # 'cache' areas (such as screen-size and thumbnail images). 89 109 # 90 # The value in each key is its "local" import file name, which is what 91 # we want to use 110 # In building the final list of files to delete, we test to see if 111 # it exists on the filesystem and if it does (unusual for a file 112 # that's allegedly deleted!) , supress it from going into the final 113 # list 114 115 my $collectdir = $ENV{'GSDLCOLLECTDIR'}; 116 92 117 my @deleted_files = values %$full_prev_all_files; 93 map { $block_hash->{'deleted_files'}->{$_} = 1 } @deleted_files; 118 map { my $curr_file = $_; 119 my $full_curr_file = $curr_file; 120 121 if (!&util::filename_is_absolute($curr_file)) { 122 # add in import dir to make absolute 123 124 $full_curr_file = &util::filename_cat($collectdir,$curr_file); 125 } 126 127 128 if (!-e $full_curr_file) { 129 $block_hash->{'deleted_files'}->{$curr_file} = 1; 130 } 131 } @deleted_files; 94 132 } 95 133 … … 110 148 # this file is used in (note in most cases, it's just one OID) 111 149 112 # An improvement would be to have the record read113 # into a hash array114 150 my $src_rec = GDBMUtils::gdbmRecordToHash($arcinfo_src_filename,$file); 115 151 my $oids = $src_rec->{'oid'}; 116 152 foreach my $oid (@$oids) { 117 153 118 # find out if it's an assoc file or main doc154 # Find out if it's an assoc file or main doc 119 155 120 156 my $doc_rec = GDBMUtils::gdbmRecordToHash($arcinfo_doc_filename,$oid); 121 ## print STDERR "file = $file\n";122 123 157 if ($doc_rec->{'src-file'}->[0] eq $file) { 124 # mark it for deletion 158 # It's the main doc 159 # => mark it for deletion 160 125 161 if ($verbosity>1) { 126 print STDERR "$oid marked to be deleted \n";162 print STDERR "$oid marked to be deleted from index on next buildcol.pl\n"; 127 163 } 128 164 $archive_info->set_status_info($oid,"D"); 129 165 130 166 my $val = &GDBMUtils::gdbmDatabaseGet($arcinfo_doc_filename,$oid); 131 $val =~ s/^<index-status>(.*)$/<index-status>D/m; 132 &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val); 167 my ($index_status) = ($val =~ m/^<index-status>(.*)$/m); 168 if ($index_status ne "D") { 169 $val =~ s/^<index-status>(.*)$/<index-status>D/m; 170 &GDBMUtils::gdbmDatabaseSet($arcinfo_doc_filename,$oid,$val); 171 my $doc_file = $doc_rec->{'doc-file'}->[0]; 172 173 my $doc_filename = &util::filename_cat($archivedir,$doc_file); 174 175 176 my ($doc_tailname, $doc_dirname, $suffix) 177 = File::Basename::fileparse($doc_filename, "\\.[^\\.]+\$"); 178 179 print STDERR "Removing $doc_dirname\n" if ($verbosity>2); 180 181 &util::rm_r($doc_dirname); 182 183 } 133 184 } 134 185 else { … … 149 200 } 150 201 } 202 203 GDBMUtils::gdbmDatabaseRemove($arcinfo_src_filename,$file); 151 204 } 152 205 } … … 155 208 156 209 210 sub mark_docs_for_reindex 211 { 212 my ($archive_info,$existing_files_ref,$archivedir,$verbosity) = @_; 213 214 # Reindexing is accomplished by deleting the previously indexed 215 # version of the document, and then allowing the new version to 216 # be indexed (as would a new document be indexed). 217 # 218 # The first step (marking for deletion) is implemented by this routine. 219 # 220 # By default in Greenstone a new version of an index will hash to 221 # a new unique OID, and the above strategy of reindex=delete+add 222 # works fine. A special case arises when a persistent OID is 223 # allocated to a document (for instance through a metadata field), 224 # and the second step to reindexing (see XXXX) detects this and 225 # deals with it appropriately. 226 227 my $db_ext = &util::is_little_endian() ? ".ldb" : ".bdb"; 228 my $doc_db = "archiveinf-doc$db_ext"; 229 my $arcinfo_doc_filename = &util::filename_cat ($archivedir, $doc_db); 230 231 232 my $archiveinf_timestamp = -M $arcinfo_doc_filename; 233 234 my $reindex_files_ref = []; 235 236 foreach my $existing_filename (@$existing_files_ref) { 237 238 if (-M $existing_filename < $archiveinf_timestamp) { 239 # file is newer than last build 240 241 my $existing_file = $existing_filename; 242 my $collectdir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}); 243 244 $existing_file =~ s/^$collectdir(\\|\/)?//; 245 246 print STDERR "**** Deleting existing file: $existing_file\n"; 247 248 push(@$reindex_files_ref,$existing_file); 249 } 250 251 } 252 253 mark_docs_for_deletion($archive_info,$reindex_files_ref,$archivedir,$verbosity); 254 255 return @$reindex_files_ref; 256 } 257 258 259 157 260 1;
Note:
See TracChangeset
for help on using the changeset viewer.