Changeset 22037
- Timestamp:
- 2010-05-05T14:53:53+12:00 (13 years ago)
- Location:
- main/trunk/greenstone2
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/bin/script/export.pl
r21664 r22037 488 488 print STDERR "<export>\n" if $gli; 489 489 490 my $manifest_lookup = new manifest( );490 my $manifest_lookup = new manifest($collectcfg->{'infodbtype'},$archivedir); 491 491 if ($manifest ne "") { 492 492 my $manifest_filename = $manifest; -
main/trunk/greenstone2/bin/script/import.pl
r22011 r22037 491 491 print STDERR "<Import>\n" if $gli; 492 492 493 my $manifest_lookup = new manifest( );493 my $manifest_lookup = new manifest($collectcfg->{'infodbtype'},$archivedir); 494 494 if ($manifest ne "") { 495 495 my $manifest_filename = $manifest; … … 617 617 $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area); 618 618 $collect_tmp_area = &util::filename_to_regex($collect_tmp_area); 619 620 619 621 620 foreach my $df (@deleted_files) { 622 621 next if ($df =~ m/^$gsdl_tmp_area/); … … 624 623 625 624 push(@filtered_deleted_files,$df); 626 } 625 } 627 626 628 627 629 628 @deleted_files = @filtered_deleted_files; 630 629 … … 661 660 else 662 661 { 663 # process any files marked for importing 664 foreach my $file (keys %{$manifest_lookup->{'import'}}) { 665 &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli); 666 } 662 # 663 # 1. Process delete files first 664 # 667 665 668 666 my @deleted_files = keys %{$manifest_lookup->{'delete'}}; 669 667 my @full_deleted_files = (); 670 668 671 foreach my $df (@deleted_files) { 672 my $full_df = &util::filename_cat($importdir,$df); 669 # ensure all filenames are absolute 670 foreach my $df (@deleted_files) { 671 my $full_df = 672 (&util::filename_is_absolute($df)) 673 ? $df 674 : &util::filename_cat($importdir,$df); 675 673 676 push(@full_deleted_files,$full_df); 674 677 } 675 678 679 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_deleted_files); 676 680 &inexport::mark_docs_for_deletion($archive_info,{}, 677 681 \@full_deleted_files, 678 682 $archivedir, $verbosity, "delete"); 683 684 685 # 686 # 2. Now files for reindexing 687 # 688 689 my @reindex_files = keys %{$manifest_lookup->{'reindex'}}; 690 my @full_reindex_files = (); 691 692 # ensure all filenames are absolute 693 foreach my $rf (@reindex_files) { 694 my $full_rf = 695 (&util::filename_is_absolute($rf)) 696 ? $rf 697 : &util::filename_cat($importdir,$rf); 698 699 push(@full_reindex_files,$full_rf); 700 } 701 702 &plugin::remove_some($pluginfo, $collectcfg->{'infodbtype'}, $archivedir, \@full_reindex_files); 703 &inexport::mark_docs_for_deletion($archive_info,{},\@full_reindex_files, $archivedir,$verbosity, "reindex"); 704 705 # And now ensure the new version of the file processed by appropriate 706 # plugin 707 foreach my $full_rf (@full_reindex_files) { 708 &plugin::read ($pluginfo, "", $full_rf, {}, {}, $processor, $maxdocs, 0, $gli); 709 } 710 711 712 # 713 # 3. Now finally any new files 714 # 715 716 foreach my $file (keys %{$manifest_lookup->{'index'}}) { 717 &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli); 718 } 719 720 679 721 } 680 722 -
main/trunk/greenstone2/perllib/manifest.pm
r18441 r22037 1 1 package manifest; 2 2 3 use XMLParser;4 3 use strict; 5 4 no strict 'refs'; # allow filehandles to be variables and viceversa 6 5 6 use XMLParser; 7 use dbutil; 8 7 9 our $self; 8 10 9 11 sub new { 10 12 my ($class) = shift (@_); 13 my ($infodbtype,$archivedir) = @_; 11 14 12 15 $self = {} ; … … 15 18 $self->{'reindex'} = {}; 16 19 $self->{'delete'} = {}; 20 21 my $arcinfo_doc_filename 22 = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archivedir); 23 24 if (-e $arcinfo_doc_filename) { 25 # Only store the infodb-doc filename if it exists 26 # If it doesn't exist then this means the collection has not been 27 # built yet (or else the archives folder has been deleted). 28 # Either way we have no way to look up which files 29 # are associated with an OID. If we we encounter an OID 30 # tag later on, we will use the fact that this field is 31 # not defined to issue a warning 32 33 $self->{'_arcinfo-doc-filename'} = $arcinfo_doc_filename; 34 $self->{'_infodbtype'} = $infodbtype; 35 } 17 36 18 37 return bless $self, $class; … … 89 108 my ($expat, $element) = @_; 90 109 91 if ( $element eq "Filename")92 { 93 $self->{' filename'} = "";110 if (($element eq "Filename") || ($element eq "OID")) 111 { 112 $self->{'item-val'} = ""; 94 113 } 95 114 elsif ($element eq "Manifest") { … … 99 118 if (defined($self->{'file-type'})) 100 119 { 101 print STDERR "Warning: Malformed XML manifest ($element nested inside " . $self->{'file-type'} . ")\n"; 102 } 103 104 $self->{'file-type'} = $element; 120 print STDERR "Warning: Malformed XML manifest\n"; 121 print STDERR " Unrecognized element $element nested inside " . $self->{'file-type'} . ".\n"; 122 } 123 else { 124 my $filetype = lc($element); 125 $self->{'file-type'} = $filetype; 126 if (!defined $self->{$filetype}) { 127 print STDERR "Warning: <$element> is not one of the registered tags for manifest format.\n"; 128 } 129 } 130 105 131 } 106 132 } … … 114 140 if ($element eq "Filename") 115 141 { 116 $self->{lc($self->{'file-type'})}->{$self->{'filename'}} = 1; 117 $self->{'filename'} = undef; 142 my $filetype = $self->{'file-type'}; 143 my $filename = $self->{'item-val'}; 144 145 $self->{$filetype}->{$filename} = 1; 146 $self->{'item-val'} = undef; 147 } 148 elsif ($element eq "OID") { 149 # look up src and assoc filenames used by this doc oid 150 151 my $filetype = $self->{'file-type'}; 152 my $oid = $self->{'item-val'}; 153 154 if (defined $self->{'_infodbtype'}) { 155 156 157 my $infodbtype = $self->{'_infodbtype'}; 158 my $arcinfo_doc_filename = $self->{'_arcinfo-doc-filename'}; 159 160 my $doc_rec_string = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $oid); 161 162 my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string); 163 164 my $doc_source_file = $doc_rec->{'src-file'}->[0]; 165 my $assoc_files = $doc_rec->{'assoc-file'}; 166 my @all_files = ($doc_source_file,@$assoc_files); 167 168 foreach my $filename (@all_files) { 169 170 if (!&util::filename_is_absolute($filename)) { 171 $filename = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},$filename); 172 } 173 174 $self->{$filetype}->{$filename} = 1; 175 } 176 } 177 else { 178 print STDERR "Warning: No archiveinf-doc database in archives directory.\n"; 179 print STDERR " Unable to look up source files that constitute document $oid.\n"; 180 } 181 182 $self->{'item-val'} = undef; 118 183 } 119 184 else … … 129 194 my ($expat) = @_; 130 195 131 if (defined $self->{' filename'}) {196 if (defined $self->{'item-val'}) { 132 197 my $text = $_; 133 198 chomp($text); … … 136 201 $text =~ s/\s+$//; 137 202 138 $self->{' filename'} .= $text if ($text !~ m/^\s*$/);203 $self->{'item-val'} .= $text if ($text !~ m/^\s*$/); 139 204 } 140 205 } … … 152 217 my ($expat) = @_; 153 218 219 if (defined $self->{'import'}) { 220 print STDERR "Warning: <Import> tag is deprecated.\n"; 221 print STDERR " Processing data as if it were tagged as <Index>\n"; 222 $self->{'index'} = $self->{'import'}; 223 } 224 154 225 } 155 226 -
main/trunk/greenstone2/perllib/plugin.pm
r21618 r22037 222 222 sub remove_some { 223 223 my ($pluginfo, $infodbtype, $archivedir, $deleted_files) = @_; 224 print STDERR "in remove some\n";225 224 return if (scalar(@$deleted_files)==0); 226 225 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-src", $archivedir);
Note:
See TracChangeset
for help on using the changeset viewer.