- Timestamp:
- 2023-01-18T15:22:41+13:00 (15 months ago)
- Location:
- main/trunk/greenstone2/perllib
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/inexport.pm
r36471 r37152 47 47 use parse2; 48 48 49 use DocHistoryFileUtils; 50 use FileUtils; 51 49 52 use File::Basename; 50 53 … … 121 124 'reqd' => "no", 122 125 'hiddengli' => "yes" }, 126 { 'name' => "replaceold", 127 'desc' => "{import.replaceold}", 128 'type' => "flag", 129 'reqd' => "no", 130 'hiddengli' => "yes" }, 123 131 { 'name' => "removeold", 124 132 'desc' => "{import.removeold}", … … 428 436 $archivedir = &FileUtils::sanitizePath($archivedir); 429 437 } 438 439 my $archivedir_keepold = "${archivedir}_keepold"; # used when file-level document-version history is in play 430 440 $self->{'archivedir'} = $archivedir; 431 441 $self->{'archivedir_keepold'} = $archivedir_keepold; 442 432 443 if (defined $self->{'default_verbosity'}) { 433 444 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) { … … 478 489 my $checkdir = ($inexport_mode eq "import") ? "archives" : "export"; 479 490 480 my ($removeold, $keepold, $ incremental, $incremental_mode)481 = &scriptutil::check_removeold_ and_keepold($self->{'removeold'}, $self->{'keepold'},482 483 491 my ($removeold, $keepold, $replaceold, $incremental, $incremental_mode) 492 = &scriptutil::check_removeold_keepold_replaceold($self->{'removeold'}, $self->{'keepold'}, $self->{'replaceold'}, 493 $self->{'incremental'}, $checkdir, 494 $collectcfg); 484 495 485 496 $self->{'removeold'} = $removeold; 486 497 $self->{'keepold'} = $keepold; 498 $self->{'replaceold'} = $replaceold; 487 499 $self->{'incremental'} = $incremental; 488 500 $self->{'incremental_mode'} = $incremental_mode; … … 507 519 508 520 my $importdir = $self->{'importdir'}; 509 my $archivedir = $self->{'archivedir'} || $self->{'exportdir'}; 521 my $archivedir = $self->{'archivedir'} || $self->{'exportdir'}; 522 # 'archivedir' is a tad abused, and is sometimes set to the 'exportdir' value, 523 # meaining 'archivedir_keepold' is actually the export dir name with '_keepold' appended 524 my $archivedir_keepold = $self->{'archivedir_keepold'}; 510 525 511 526 my $incremental = $self->{'incremental'}; … … 515 530 516 531 my $removeold = $self->{'removeold'}; 532 my $replaceold = $self->{'replaceold'}; 517 533 my $keepold = $self->{'keepold'}; 518 534 … … 551 567 } 552 568 553 my $manifest_lookup = new manifest($collectcfg->{'infodbtype'},$archivedir); 569 my $manifest_lookup = new manifest($collectcfg->{'infodbtype'},$archivedir); 554 570 if ($self->{'manifest'} ne "") { 555 571 my $manifest_filename = $self->{'manifest'}; … … 590 606 } 591 607 592 # remove the old contents of the archives directory (and tmp 593 # directory) if needed 594 595 if ($removeold) { 608 # Whether -removeold, -keepold or -replaceold there should never be an existing archivedir_keepold 609 # => Taken to be a sign of a previous import/export that has gone wrong 610 # => Print out error message and stop! 611 612 if (&FileUtils::directoryExists($archivedir_keepold)) { 613 my $rkr_old_minus_option = undef; # rkr = remove, keep, replace (whichever one is being used) 614 if ($removeold) { 615 $rkr_old_minus_option = "-removeold"; 616 } 617 elsif ($keepold) { 618 $rkr_old_minus_option = "-keepold"; 619 } 620 elsif ($replaceold) { 621 $rkr_old_minus_option = "-replaceold"; 622 } 623 624 &gsprintf(STDERR, "\n"); 625 &gsprintf(STDERR, "Detected existing directory:\n\n"); 626 &gsprintf(STDERR, " $archivedir_keepold\n\n"); 627 &gsprintf(STDERR, "Stopping $inexport_mode.\n\n"); 628 629 &gsprintf(STDERR, "**** When building with $rkr_old_minus_option, there cannot be a pre-existing 'archives_keepold' directory\n"); 630 &gsprintf(STDERR, "****\n"); 631 &gsprintf(STDERR, "**** Review your collection directory folder, and determine whether to:\n"); 632 &gsprintf(STDERR, "**** (a) move your 'archives_keepold' back to being 'archives'; or\n"); 633 &gsprintf(STDERR, "**** (b) remove your 'archives_keepold'\n"); 634 &gsprintf(STDERR, "**** before running your $inexport_mode command again\n\n"); 635 636 exit 1; # c errno for 'operation not permitted' 637 } 638 639 640 # remove the old contents of the archives directory (and tmp directory) if needed 641 642 if ($removeold) { 596 643 if (&FileUtils::directoryExists($archivedir)) { 597 644 &gsprintf($out, "{import.removing_archives}\n"); … … 605 652 &FileUtils::removeFilesRecursive($tmpdir); 606 653 } 654 } 655 else { 656 # If not $removeold, then must be $keepold or $replaceold 657 # => for either case want to "hard-link"/copy 'archives' to 'archives_keepold' 658 659 # Want to be super careful about doing this, so as not to accidentally 660 # wipe out any previous file-level document-version history 661 662 # If got to here, then there is no pre-existing $archivedir_keepold 663 # => Hard-link copy the contents of 'archives' to 'archives_keepold' 664 # => Stop if there is any issue with creating the hard-link copy 665 666 if (!&FileUtils::hardlinkFilesRefRecursive([$archivedir],$archivedir_keepold, { 'strict' => 1 } )) { 667 668 &gsprintf(STDERR, "\nError message: $!\n\n"); 669 670 &gsprintf(STDERR, "**** Failed to make a hard-link copy of:\n"); 671 &gsprintf(STDERR, "**** $archivedir\n"); 672 &gsprintf(STDERR, "**** to:\n"); 673 &gsprintf(STDERR, "**** $archivedir_keepold\n"); 674 &gsprintf(STDERR, "****\n"); 675 &gsprintf(STDERR, "**** Unable to proceed with file-level document-version history $inexport_mode => Stopping\n"); 676 677 exit $!; 678 } 607 679 } 608 680 … … 627 699 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-src", $archivedir, $perform_firsttime_init); 628 700 701 629 702 my $archive_info = new arcinfo ($collectcfg->{'infodbtype'}); 630 $archive_info->load_info ($arcinfo_doc_filename); 631 # load in rev info so we don't overwrite existing info when we do incremental import 632 # from here on, make all changes to this object, then write out the file at the end. 703 $archive_info->load_info($arcinfo_doc_filename); 704 # Load in reverse-lookup info (used to determine the docs that a file in import are used in), 705 # so we don't overwrite existing info when we do incremental import 706 # From here on, make all changes to this object, then write out the file at the end. 633 707 $archive_info->load_rev_info($arcinfo_src_filename); 634 708 … … 710 784 $block_hash->{'new_files'} = {}; 711 785 $block_hash->{'reindex_files'} = {}; 712 # all of these are set somewhere else, so it's more readable to define them 713 # here [jmt12]786 787 # all of these are set somewhere else, so it's more readable to define them here [jmt12] 714 788 $block_hash->{'all_files'} = {}; 715 789 $block_hash->{'deleted_files'} = {}; … … 829 903 830 904 my $arcinfo_src_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-src", $archivedir); 905 831 906 # need to check this file exists before trying to read it - in the past 832 907 # it wasn't possible to have a manifest unless keepold was also set so … … 902 977 # Can now work out which files were new, already existed, and have 903 978 # been deleted 904 979 905 980 new_vs_old_import_diff($archive_info,$block_hash,$importdir, 906 981 $archivedir,$verbosity,$incremental_mode); … … 1007 1082 if ($self->{'OIDtype'} eq 'incremental') 1008 1083 { 1009 store_doc_oid_count($archivedir); 1084 store_doc_oid_count($archivedir); 1010 1085 } 1011 1086 … … 1014 1089 $processor->end(); 1015 1090 1016 # if ($inexport_mode eq "import") {1017 1091 if ($self->{'generate_auxiliary_files'}) { 1092 1018 1093 # write out the archive information file 1019 1094 # for backwards compatability with archvies.inf file 1020 1095 if ($arcinfo_doc_filename =~ m/(contents)|(\.inf)$/) { 1096 # In the days of this being a text file, this all we had to do 1097 # Note, if still using this form of archive-inf, then neither 1098 # incremental building nor files-level document-version history 1099 # is suported 1021 1100 $archive_info->save_info($arcinfo_doc_filename); 1022 1101 } … … 1025 1104 } 1026 1105 } 1106 1107 1108 # 1109 # Now deal with any file-level document-version history (fldv-history) 1110 # 1111 1112 if ($keepold || $removeold) { 1113 1114 &DocHistoryFileUtils::archivedir_keepold_to_archivedir($collectcfg, $keepold, $replaceold, $incremental_mode, $archive_info, $archivedir,$archivedir_keepold); 1115 1116 } 1117 1118 1027 1119 return $pluginfo; 1028 1120 } … … 1050 1142 elsif ($manifest eq '' || $self->{'manifest_version'} == 1) 1051 1143 { 1144 #print STDERR "**** perform_process_files(): importdir=$importdir\n"; 1145 #print STDERR "**** block_hash:\n ", join("\n ", keys %{$block_hash}), "\n\n"; 1146 #print STDERR "**** block_hash->all_files:\n ", join("\n ", keys %{$block_hash->{'all_files'}}), "\n\n"; 1147 #print STDERR "**** block_hash->reindex_files:\n ", join("\n ", keys %{$block_hash->{'reindex_files'}}), "\n\n"; 1148 1149 #print STDERR "**** block_hash->existing_files:\n ", join("\n ", keys %{$block_hash->{'existing_files'}}), "\n\n"; 1150 #print STDERR "**** block_hash->file_blocks:\n ", join("\n ", keys %{$block_hash->{'file_blocks'}}), "\n\n"; 1151 1052 1152 &plugin::read ($pluginfo, $importdir, '', $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 1053 1153 } … … 1197 1297 1198 1298 my $prev_all_files = $archive_info->{'prev_import_filelist'}; 1299 1199 1300 my $full_prev_all_files = {}; 1200 1301 … … 1228 1329 } 1229 1330 1331 ###print STDERR "*** new vs old: look to see if full_curr_file=$full_curr_file in full_prev_all_files hashmap\n"; 1332 1230 1333 # figure out if new file or not 1231 1334 if (defined $full_prev_all_files->{$full_curr_file}) { … … 1335 1438 #$existing_file =~ s/^$collectdir_resafe(\\|\/)?//; 1336 1439 1337 print STDERR "**** Reindexing existing file: $existing_file\n";1440 # print STDERR "**** Reindexing existing file: $existing_file\n"; 1338 1441 1339 1442 push(@$reindex_files,$existing_file); -
main/trunk/greenstone2/perllib/scriptutil.pm
r20646 r37152 32 32 use gsprintf 'gsprintf'; 33 33 34 # returns $removeold, $keepold34 # returns ($removeold, $keepold, $incremental, $incremental_mode) 35 35 sub check_removeold_and_keepold { 36 36 37 37 my ($removeold, $keepold, $incremental, $dir, $collectcfg) = @_; 38 38 39 if (($keepold && $removeold) || ($incremental && $removeold) ) { 40 gsprintf(STDERR, "{scripts.both_old_options}\n", $dir); 39 if ($keepold && $removeold) { 40 gsprintf(STDERR, "{scripts.only_one_old_option}\n"); 41 exit(2) 42 } 43 44 if ($incremental && $removeold) { 45 gsprintf(STDERR, "{scripts.inc_remove_conflict}\n", $dir); 41 46 sleep(3); #just in case 42 47 return (1,0,0,"none"); … … 44 49 } 45 50 51 46 52 # Incremental mode may be set to "none", "onlyadd" or "all" 47 53 # depending on status of -keepold and -incremental flags … … 80 86 } 81 87 88 89 # returns ($removeold, $keepold, $replaceold, $incremental, $incremental_mode) 90 sub check_removeold_keepold_replaceold { 91 92 my ($removeold, $keepold, $replaceold, $incremental, $dir, $collectcfg) = @_; 93 94 my $old_count = 0; 95 $old_count++ if $removeold; 96 $old_count++ if $keepold; 97 $old_count++ if $replaceold; 98 99 if ($old_count>1) { 100 gsprintf(STDERR, "{scripts.only_one_old_option}\n"); 101 exit(2); 102 } 103 104 if (($incremental && $removeold) ) { 105 gsprintf(STDERR, "{scripts.inc_remove_conflict}\n", $dir); 106 sleep(5); #just in case 107 return (1,0,0,0,"none"); 108 } 109 110 # Determine what the internal 'incremental_mode' is: 111 # => May be set to "none", "onlyadd" or "all" 112 # Based on status of (-keepold|-removeold) and -incremental flags 113 # 114 # With the introduction of file-level document-version (fldv) history, the chosen name 'onlyadd' 115 # for when '-keepold' is on is a bit misleading. However, it does still get us "over the line" 116 # in terms of how it functionally operates. In the case where pre-existing content is 117 # still in the 'import' folder, then when everything in 'archives_keepold' gets copied 118 # back, any pre-existing documents from import (which will have resulted in a doc folder 119 # in 'archives') will trigger a file-level document-version history folder inside it. For any 120 # content that was new in 'import', it won't have a pre-existing folder inside 'archives' 121 # and so will appear as a new folder with *no* file-level document-version history folder 122 # inside it (effectively why the keepold incremental mode was originally called 'onlyadd'). 123 124 my $incremental_mode = "none"; 125 if ($incremental) { 126 $incremental_mode = "all"; 127 } elsif ($keepold || $replaceold) { 128 $incremental_mode = "onlyadd"; 129 } 130 131 if (!$keepold && !$removeold && !$replaceold && !$incremental && defined $collectcfg) { 132 # we only look at config file options if we don't have these on the command line 133 if (defined $collectcfg->{'removeold'} && $collectcfg->{'removeold'} =~ /^true$/i ) { 134 $removeold = 1; 135 } elsif (defined $collectcfg->{'keepold'} && $collectcfg->{'keepold'} =~ /^true$/i) { 136 $keepold = 1; 137 $incremental_mode = "onlyadd"; 138 } elsif (defined $collectcfg->{'replaceold'} && $collectcfg->{'replaceold'} =~ /^true$/i) { 139 $replaceold = 1; 140 $incremental_mode = "onlyadd"; 141 } elsif (defined $collectcfg->{'incremental'} && $collectcfg->{'incremental'} =~ /^true$/i) { 142 $incremental = 1; 143 $incremental_mode = "all"; 144 } 145 146 # Go through the same checks as before 147 my $cfg_old_count = 0; 148 $cfg_old_count++ if $removeold; 149 $cfg_old_count++ if $keepold; 150 $cfg_old_count++ if $replaceold; 151 152 if ($cfg_old_count>1) { 153 gsprintf(STDERR, "{scripts.only_one_old_option}\n"); 154 exit(2); 155 } 156 157 if (($incremental && $removeold) ) { 158 gsprintf(STDERR, "{scripts.inc_remove_conflict}\n", $dir); 159 sleep(5); #just in case 160 return (1,0,0,0,"none"); 161 } 162 } 163 164 # default to -removeold if nothing specified 165 if (!$keepold && !$removeold && !$replaceold && !$incremental) { 166 gsprintf(STDERR, "{scripts.no_old_options} \n", $dir); 167 sleep(5); #just in case 168 return (1,0,0,0,"none"); 169 } 170 171 # incremental implies keepold 172 if ($incremental) { 173 $keepold = 1; 174 } 175 return ($removeold, $keepold, $replaceold, $incremental, $incremental_mode); 176 177 } 178 179 180 82 181 1; -
main/trunk/greenstone2/perllib/strings.properties
r37047 r37152 62 62 63 63 scripts.both_old_options:WARNING: -removeold was specified with -keepold or -incremental, defaulting to -removeold. Current contents of %s directory will be deleted. 64 65 scripts.inc_remove_conflict:WARNING: -incremental and -removeold were specified. Defaulting to -removeold. Current contents of %s directory will be deleted. 66 67 scripts.only_one_old_option:Error: conflicting 'old' options: can only specify one of -removeold, -keepold, -replaceold. Exiting. 64 68 65 69 scripts.no_old_options:WARNING: None of -removeold, -keepold or -incremental were specified, defaulting to -removeold. Current contents of %s directory will be deleted.
Note:
See TracChangeset
for help on using the changeset viewer.