Changeset 33392


Ignore:
Timestamp:
2019-08-08T15:15:11+12:00 (5 years ago)
Author:
ak19
Message:

Kathy found a problem whereby she wanted to run consecutive buildcols without activate on a solr collection. She experienced file locking issues on Windows, which the original solr related building code would inevitably cause without activate. Dr Bainbridge's solution was to change our way of thinking about what activate and buildcol should now be doing as regards solr collections. The solution was to unload building-cores for indexes at the end of buildcol, instead of only doing this during activate.pl. I've tried to be conservative with the changes made to the existing code, so that activate still attempts to also unload building-cores, but first pings them (and any other cores it attempts to unload) to ensure the cores exist. During buildcol too, the building-cores are pinged to check they exist before we attempt to unload them.

Files:
3 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/solr/trunk/src/perllib/solrbuilder.pm

    r33372 r33392  
    532532    }
    533533    else {
    534         # if collect==core already in solr.xml (check with STATUS)
    535         # => use RELOAD call to refresh fields now expressed in schema.xml
    536         #
     534        # if collect==core is already in solr.xml (check with STATUS)
     535        # => use RELOAD* call to refresh fields now expressed in schema.xml
     536        # 
    537537        # else
    538538        # => use CREATE API to add to solr.xml
     539        #
     540        # No longer calling RELOAD, because Georgy documented a memory leak with it (svn r32178)
     541        # Using unload + create to get the same effect as RELOAD without its side-effects.
     542        #
    539543       
    540544        my $check_core_exists = $solr_server->admin_ping_core($core);
     
    545549        }
    546550       
    547         print $outhandle "Creating Solr core: $core\n";
    548         $solr_server->admin_create_core($core);
     551        print $outhandle "Creating Solr core: $core\n";
     552        $solr_server->admin_create_core($core);
    549553       
    550554    }
     
    690694    print STDERR "</Stage>\n" if $self->{'gli'};
    691695
     696
     697    print STDERR "@@@@@ FINISHED PROCESSING INDEX: indexlevel $self->{'index_mapping'}->{$index}\n\n";
     698
    692699}
    693700
     
    700707    # as this has been done in our pre_build_indexes() phase for solr
    701708
    702 
     709     
     710    my $solr_server = $self->{'solr_server'};
     711
     712    # 1 Aug 2019: now we unload (remove) building-cores for each index during buildcol itself
     713    # instead of during activate.
     714    # Kathy described a problem that when calling buildcol.pl successively without succeeding each
     715    # with a call to activate, there were Win file lock issues when attempting to manually remove
     716    # the building folder. This was what activate was to solve, however, there's no reason to
     717    # call activate after buildcol in such cases where it is known the buildcol failed in some way.
     718    # (In such cases, the user building the collection would have to manually unload the building-
     719    # cores through the solr servlet interface).
     720    # Dr Bainbridge instructed that the building- cores should be unloaded again at the end
     721    # of buildcol. And any symmetrical step during pre-building, if any is found necessary.
     722    # I'm still not sure this won't break activate in some way, for some combination,
     723    # as that is meant to ensure building-cores exist whenever the building folder exists...
     724    # But I was asked not to take to long on this, so I can't test all the different combinations
     725    # (removeold/incremental/..., or even remote GS situation) in which building can happen and in
     726    # which buildcol can be combined or not with activate, or be sequenced with further calls to
     727    # buildcol, with or without -activate.
     728    # So to compensate, I've tried to keep the code changes as conservative as possible to keep the chances
     729    # for things going wrong to a minimum, by pinging for building-* cores before unloading them here
     730    # in solrbuilder.pm (note that unload doesn't delete the index directory associated with the core)
     731    # and then in activate.pl the building-* cores get pinged again to determine whether they exist
     732    # before attempting to unload them there as well, since I can no longer assume the corse exist
     733    # and can be unloaded. There is now the additional overhead of all the extra pinging going on,
     734    # but it helps ensure we only unload building-* cores when they exist.
     735
     736    # Note that pre-build-indexes() was already creating the building- cores, so don't need to
     737    # worry about the needed symmetry at start and end of buildcol to create building- cores
     738    # in symmetry with unloading them here.
     739   
     740    my $site        = $self->{'site'};
     741    my $collect     = $self->{'collection'};
     742    my $core_prefix = (defined $site) ? "$site-$collect" : $collect;   
     743    my $build_dir = $self->{'build_dir'};
     744    my $idx = "idx";
     745   
     746    foreach my $level (keys %{$self->{'levels'}}) {
     747   
     748    my ($pindex) = $level =~ /^(.)/;
     749
     750    my $index_dir = $pindex.$idx;
     751    my $corename = "building-$core_prefix-$index_dir";
     752
     753    # unload the core if it exists. Since I can't think of exactly in which cases
     754    # the building-cores exist and won't exist (e.g. removeold), I'll just always
     755    # first check if the building-core exists and then unload it.
     756    if ($solr_server->admin_ping_core($corename)) {
     757        print STDERR "@@@@ solrbuilder::post_build_indexes(): Now unloading this index's building core: $corename\n\n";
     758        $solr_server->admin_unload_core_explicitly_retaining_index($corename)
     759    }
     760
     761    }
     762   
    703763    # Also need to stop the Solr server (be it tomcat or jetty) if it was explicitly started
    704764    # in pre_build_indexes()
    705    
    706     my $solr_server = $self->{'solr_server'};
    707765
    708766    if ($solr_server->explicitly_started()) {
  • gs3-extensions/solr/trunk/src/perllib/solrserver.pm

    r33315 r33392  
    407407}
    408408
     409sub admin_unload_core_explicitly_retaining_index
     410{
     411    # For UNLOAD core params, see page 315 of
     412    #    https://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf
     413   
     414    my $self = shift @_;
     415    my ($core) = @_;
     416   
     417    # Don't delete index (sidx/didx folder) along with unloading core, so force 0 as parameter
     418    # (though not deleting the index is the default behaviour of admin_unload_core() anyway,
     419    # since activate is meant to manually take care of deleting the index folder and moving the
     420    # building folder to replace index, instead of activate asking unload_core to delete the
     421    # index folder).
     422    # But this function's very particular behaviour may be crucial for other instances such as
     423    # its use in solrbuilder::post_build_indexes(), so even if admin_unload_core() could
     424    # conceivably be changed to delete the index by default, this method would still do the
     425    # right thing when called by solrbuilder::post_build_indexes().
     426    $self->admin_unload_core($core, 0);
     427}
     428
     429
    409430sub start
    410431{
  • main/trunk/greenstone2/bin/script/activate.pl

    r32178 r33392  
    277277   
    278278    # If the Solr/Jetty server is not already running, the following starts
    279     # it up, and only returns when the server is "reading and listening"   
     279    # it up, and only returns when the server is "ready and listening" 
    280280    $solr_server = new solrserver($build_dir);
    281281    $solr_server->start();
    282282   
    283     # We'll be moving building to index. For solr collection, there's further
     283    # We'll be moving building to index. For a solr collection, there's further
    284284    # special processing to make a corresponding change to the solr.xml
    285285    # by removing the temporary building cores and (re)creating the index cores
     
    307307        # if solr, remove any cores that are using the index_dir before deleting this dir
    308308        foreach my $corename (@corenames) {
    309             $solr_server->admin_unload_core($corename);
     309            $solr_server->admin_unload_core($corename) if ($solr_server->admin_ping_core($corename));
    310310        }
    311311        }   
     
    327327        # if solr, remove any cores that are using the building_dir before moving this dir onto index
    328328        foreach my $corename (@corenames) {
    329         $solr_server->admin_unload_core("building-$corename");
     329        $solr_server->admin_unload_core("building-$corename") if ($solr_server->admin_ping_core("building-$corename"));
    330330        }
    331331    }
     
    376376            $solr_server->admin_unload_core($corename);
    377377        }
    378             $solr_server->admin_create_core($corename, $index_dir);
     378        $solr_server->admin_create_core($corename, $index_dir);
    379379       
    380380        }
Note: See TracChangeset for help on using the changeset viewer.