Changeset 37194


Ignore:
Timestamp:
2023-01-28T19:01:43+13:00 (15 months ago)
Author:
davidb
Message:

Tested version of file-level document-version history (fldv-history) using the newer mv archives to archives_keepold and then hardlink-copy back to archiaves. Uses the newer technique of storing the timestamp in archives-timestamp.out, rather than rely on the timestamp of the file (which now changes due to the copy step

Location:
main/trunk/greenstone2/perllib
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/DocHistoryFileUtils.pm

    r37187 r37194  
    379379
    380380
     381sub doc_copy_archivedir_keepold_to_archivedir
     382{
     383
     384    my ($keepold_doc_dirname,$doc_dirname) = @_;
     385   
     386    my $status_ok = &FileUtils::hardlinkFilesRefRecursive([$keepold_doc_dirname],$doc_dirname, { 'strict' => 1 } );
     387   
     388    if (!$status_ok) {
     389    print STDERR "Error: Failed to recursively hardlink copy the top-level 'keepold_archives' doc to 'archives'\n";
     390    }   
     391
     392    if (!$status_ok) {
     393    print STDERR "\n";
     394    print STDERR "**** A critical error occurred in creating/updating file-level document-version history\n";
     395    print STDERR "**** After determining and correcting the cause of the error, to reset, delete\n";
     396    print STDERR "****     your 'archives' folder, and move 'archives_keep' back to 'archives'\n";
     397    print STDERR "\n";
     398    exit 1;
     399    }       
     400}
     401
    381402
    382403sub archivedir_keepold_to_archivedir
     
    385406
    386407    # Action Step 5 (from inexport.pm)
    387    
    388     # 5.1  a keepold doc's '_fldv_history' goes first
    389     # 5.2  then the keepold doc's top-level content for new 'nminus 1'
     408
     409    # If needing to turn content in 'archives_keep' into a doc's 'archive' fldv_history/ directory, then
     410    #   5.1  a keepold doc's '_fldv_history' goes first
     411    #   5.2  then the keepold doc's top-level content for new 'nminus 1'
    390412   
    391413    my $perform_firsttime_init = 1;
     
    446468        # **** Need additional work if -groupsize option > 1 is to be supported!
    447469        # **** otherwise need to have earlier test to prevent -groupsize > 1 from been used with fldv-history
    448        
     470
     471        my $keepold_doc_filename = &FileUtils::filenameConcatenate($archivedir_keepold, $keepold_doc_file);
     472        my $keepold_doc_dirname  = &util::get_parent_folder($keepold_doc_filename);
     473
     474        my $doc_filename = &FileUtils::filenameConcatenate($archivedir, $doc_file);
     475        my $doc_dirname  = &util::get_parent_folder($doc_filename);
     476
    449477        if ($index_status eq "D") {
    450478        #
    451479        # (1) if archive_info entry marked for deletion (D)
    452         #           
     480        #
     481
     482        # Even though delete is the ultimate outcome for this doc/dir, due to moving 'archives' to 'archives_keep' at the very start of import.pl
     483        # we need to recursively hardlink copy it back, so the rest of the incremental import.pl process can work as expected
     484        doc_copy_archivedir_keepold_to_archivedir($keepold_doc_dirname,$doc_dirname);
     485       
    453486        print STDERR "  The latest version of $keepold_OID is marked for deletion\n";
    454487        print STDERR "  => nothing to do right now (will be deleted when buildcol.pl run)\n";           
    455488        }
    456489        else {
    457         my $keepold_doc_filename = &FileUtils::filenameConcatenate($archivedir_keepold, $keepold_doc_file);
    458         my $keepold_doc_dirname  = &util::get_parent_folder($keepold_doc_filename);
    459 
    460         my $doc_filename = &FileUtils::filenameConcatenate($archivedir, $doc_file);
    461         my $doc_dirname = &util::get_parent_folder($doc_filename);
    462        
    463490        if ($keepold) {
    464491            #
     
    482509            }
    483510            elsif ($index_status eq "B") {
     511            # This doc's archive doc folder hasn't change, but due to moving 'archives' to 'archives_keep' at the very start of import.pl
     512            # we need to recursively hardlink copy it back
     513           
     514            doc_copy_archivedir_keepold_to_archivedir($keepold_doc_dirname,$doc_dirname);
     515           
    484516            if ($incremental_mode ne "all") {
    485517                print STDERR "  Unchanged version of document in 'archives/' (Index-Status=B)\n";
  • main/trunk/greenstone2/perllib/FileUtils.pm

    r37187 r37194  
    3333
    3434use FileHandle;
     35use File::stat;
    3536
    3637# Greenstone modules
     
    7273
    7374#canRead
     75#getTimestamp
    7476#isSymbolicLink
    7577#modificationTime
     
    106108
    107109
     110## @function getTimestamp()
     111#
     112sub getTimestamp
     113{
     114    my ($filename) = @_;
     115   
     116    my $file_stat = stat($filename);
     117    my $mtime = $file_stat->mtime;
     118
     119    return $mtime;
     120}
     121## getTimestamp()
     122   
    108123## @function closeFileHandle
    109124#
  • main/trunk/greenstone2/perllib/arcinfo.pm

    r37187 r37194  
    123123
    124124    my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
    125     print STDERR "\n\n!!!!!!!!!!!! load_info() -- Calling method: $cfilename:$cline $cpackage->$csubr\n\n";
    126125   
    127126    $self->{'info'} = {};
     
    361360    my $self = shift (@_);
    362361    my ($arcinfo_doc_filename) = @_;
    363     print STDERR "\n\n!!!!!!!!!! SAVE save_arcinfo_doc_timestamp()\n\n";
    364362   
    365363    my $arcinfo_timestamp_filename = $self->get_timestamp_filename($arcinfo_doc_filename);
  • main/trunk/greenstone2/perllib/inexport.pm

    r37187 r37194  
    715715    }
    716716
    717     print STDERR "\n\n\n";
    718     print STDERR "*****!!!!! No easy way in perl to perform file copy and perserve timestamps\n";
    719     print STDERR "*****!!!!! SO => need to implement file with timestamp within (and DBinfo type for good measure)\n";
    720     print STDERR "*****!!!!!       and change plugin/incremental building that depends on/uses -M \n";
    721     # ArchiveInfoPlugin, DirectoryPlugin inexport.pm, arcinfo.pma
    722     # DirectoryPlugin inexport.pm, arcinfo.pm (convertutil.pm OK, as working on the two files passed to it)
    723     print STDERR "\n\n\n";
    724     }
    725 
    726 
     717    if ($self->{'groupsize'} > 1) {
     718        print STDERR "\n";
     719        print STDERR "******\n";
     720        print STDERR "Warning: Minus option '-groupsize' has not been tested with file-level document version history!\n";
     721        print STDERR "   If the groups formed between subsequent invocations of import.pl stay the same, then\n";
     722        print STDERR "   the formation of file-level document-version history 'nminus-<n> bundles' in _fldv_history directories\n";
     723        print STDERR "   should remain correct\n";
     724        print STDERR "******\n";
     725        print STDERR "\n";
     726    }
     727    }
     728
     729   
    727730    # Read the archive information file
    728731    # coincidentally fldv-history: Action Step 4
     
    13381341    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archivedir);
    13391342
    1340     #my $archiveinf_timestamp = -M $arcinfo_doc_filename;
    13411343    my ($unused_infodbtype,$archiveinf_timestamp) = $archive_info->load_timestamp($arcinfo_doc_filename);
    1342 
     1344   
    13431345    # First convert all files to absolute form
    13441346    # This is to support the situation where the import folder is not
     
    13871389        if ($block_hash->{'metadata_files'}->{$full_curr_file}) {
    13881390        # is it modified??
    1389         if (-M $full_curr_file < $archiveinf_timestamp) {
     1391        my $full_curr_file_timestamp = &FileUtils::getTimestamp($full_curr_file);
     1392
     1393        if ($full_curr_file_timestamp > $archiveinf_timestamp) {
    13901394            print STDERR "*** Detected a *modified metadata* file: $full_curr_file\n" if $verbosity >= 2;
    13911395            # its newer than last build
     
    14781482
    14791483    foreach my $existing_filename (@existing_files) {
    1480     if (-M $existing_filename < $archiveinf_timestamp) {
     1484    my $existing_filename_timestamp = &FileUtils::getTimestamp($existing_filename);
     1485    if ($existing_filename_timestamp > $archiveinf_timestamp) {
    14811486        # file is newer than last build
    14821487       
  • main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm

    r37187 r37194  
    141141        my $archive_info = new arcinfo($infodbtype);
    142142        my ($unused_infodbtype,$archiveinf_timestamp) = $archive_info->load_timestamp($arcinfo_doc_filename);
    143        
    144         # $self->{'inf_timestamp'} = -M $archives_inf;
    145143        $self->{'inf_timestamp'} = $archiveinf_timestamp;
    146144    }
Note: See TracChangeset for help on using the changeset viewer.