Ignore:
Timestamp:
2018-11-06T19:26:08+13:00 (5 years ago)
Author:
ak19
Message:

Optimising. The gssql class internally has only one shared connection to the db, making the connection only the first time and disconnecting only when the last gssql is finished(). For keywords: this is implemented using the singleton coding (anti-) pattern. Now each perl process (import or buildcol) will connect to the SQL DB only once, not twice during import where it used to be once for GS SQL plugout and once for GSSQL plugin.

Location:
main/trunk/greenstone2/perllib
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/gssql.pm

    r32576 r32578  
    3333use DBI; # the central package for this module used by GreenstoneSQL Plugout and Plugin
    3434
     35# singleton connection
     36my $_dbh_instance = undef; # calls undef() function. See https://perlmaven.com/undef-and-defined-in-perl
     37my $ref_count = 0;
     38
    3539# Need params_map keys:
    3640# - collection_name
     
    4953# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
    5054
     55# TODO: remove unnecessary warn() since PrintError is active
     56# https://perldoc.perl.org/perlobj.html#Destructors
     57
     58# TODO: drop table if exists and create table if exists are available in MySQL. Use those cmds
     59# instead of always first checking for existence ourselves?
     60
    5161sub new
    5262
     
    8090    return $self;
    8191}
     92
    8293
    8394
     
    109120# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
    110121
     122
     123
     124# SINGLETON / GET INSTANCE PATTERN
     125# https://stackoverflow.com/questions/16655603/perl-objects-class-variable-initialization
     126# https://stackoverflow.com/questions/7587157/how-can-i-set-a-static-variable-that-can-be-accessed-by-all-subclasses-of-the-sa
     127# Singleton without Moose: https://www.perl.com/article/52/2013/12/11/Implementing-the-singleton-pattern-in-Perl/
     128
     129sub connect_to_db
     130{
     131    my $self= shift (@_);
     132    my ($params_map) = @_;
     133   
     134    $params_map->{'db_encoding'} = $self->{'db_encoding'};
     135    $params_map->{'verbosity'} = $self->{'verbosity'};
     136   
     137    $self->{'db_handle'} = &_get_connection_instance($params_map); # getting singleton (class method)
     138    if($self->{'db_handle'}) {
     139    $ref_count++; # if successful, keep track of the number of refs to the single db connection
     140    return $self->{'db_handle'};
     141    }
     142    return undef;
     143}
     144
     145# SINGLETON METHOD #
    111146# TODO: where should the defaults for these params be, here or in GS-SQLPlugin/Plugout?
    112 sub connect_to_db {
    113     my $self= shift (@_);
     147sub _get_connection_instance
     148{
     149    #my $self= shift (@_); # singleton method doesn't use self, but callers don't need to know that
    114150    my ($params_map) = @_;
     151
     152    return $_dbh_instance if($_dbh_instance);
     153
     154    # or make the connection
     155   
    115156    # For proper utf8 support in MySQL, encoding should be 'utf8mb4' as 'utf8' is insufficient
    116     my $db_enc = "utf8mb4" if $self->{'db_encoding'} eq "utf8";
     157    my $db_enc = "utf8mb4" if $params_map->{'db_encoding'} eq "utf8";
    117158
    118159    # these are the params for connecting to MySQL
     
    130171    my $connect_str = "dbi:$db_driver:host=$db_host"; # don't provide db - allows checking the db exists later when loading the db
    131172
    132     if($self->{'verbosity'}) {
     173    if($params_map->{'verbosity'}) {
    133174    print STDERR "Away to make connection to $db_driver database with:\n";
    134175    print STDERR " - hostname $db_host; username: $db_user";
     
    183224   
    184225    # if we're here, then connection succeeded, store handle
    185     $self->{'db_handle'} = $dbh;
    186     return 1;
    187 }
     226    $_dbh_instance = $dbh;
     227    return $_dbh_instance;
     228   
     229}
     230
     231# Will disconnect if this instance of gssql holds the last reference to the db connection
     232sub finished() {
     233    my $self= shift (@_);
     234
     235    $ref_count--;
     236    if($ref_count == 0) {
     237    $self->force_disconnect_from_db();
     238    }   
     239}
     240
     241# Call this method on die(), so that you're sure the perl process has disconnected from SQL db
     242# Disconnect from db - https://metacpan.org/pod/DBI#disconnect
     243# TODO: make sure to have committed or rolled back before disconnect
     244# and that you've call finish() on statement handles if any fetch remnants remain
     245sub force_disconnect_from_db {
     246    my $self= shift (@_);
     247
     248    if($_dbh_instance) {
     249    # make sure any active stmt handles are finished
     250    # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
     251   
     252    print STDERR "    GSSQL disconnecting from database\n";
     253    # Just go through the singleton db handle to disconnect
     254    $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
     255    $_dbh_instance = undef;
     256    }
     257    # Number of gssql objects that share a live connection is now 0, as the connection's dead
     258    # either because the last gssql object finished() or because connection was killed (force)
     259    $ref_count = 0;
     260}
     261
    188262
    189263# Load the designated database, i.e. 'use <dbname>;'.
     
    260334}
    261335
    262 # disconnect from db - https://metacpan.org/pod/DBI#disconnect
    263 # TODO: make sure to have committed or rolled back before disconnect
    264 # and that you've call finish() on statement handles if any fetch remnants remain
    265 sub disconnect_from_db {
    266     my $self= shift (@_);   
    267     my $dbh = $self->{'db_handle'};
    268 
    269     # make sure any active stmt handles are finished
    270     # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
    271    
    272     print STDERR "Disconnecting from database\n" if($self->{'verbosity'} > 1);
    273    
    274     my $rc = $dbh->disconnect or warn $dbh->errstr; # The handle is of little use after disconnecting. Possibly PrintError already prints a warning and this duplicates it?
    275     return $rc;
    276 }
    277336
    278337sub create_db {
  • main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm

    r32577 r32578  
    250250    #return undef unless $self->can_process_this_file($file); # NO, DON'T DO THIS (inherited remove_one behaviour) HERE:
    251251           # WE DON'T CARE IF IT'S AN IMAGE FILE THAT WAS DELETED.
    252            # WE CARE ABOUT REMOVING THE DOCOID OF THAT IMAGE FILE FROM THE SQL DB
     252           # WE CARE ABOUT REMOVING THE DOC_OID OF THAT IMAGE FILE FROM THE SQL DB
    253253           # SO DON'T RETURN IF CAN'T_PROCESS_THIS_FILE
    254254   
     
    430430               });
    431431
    432     # try connecting to the mysql db, if that fails it will die
     432    # try connecting to the mysql db, die if that fails
    433433    if(!$gs_sql->connect_to_db({
    434434    'db_driver' => $self->{'db_driver'},
     
    452452    # This is fatal for the plugout, let's terminate here after disconnecting again
    453453    # PrintError would already have displayed the warning message on load fail
    454     $gs_sql->disconnect_from_db()
    455         || warn("Unable to disconnect from database.\n");
     454    $gs_sql->force_disconnect_from_db();
    456455    die("Could not use db $db_name. Can't proceed.\n");
    457456    }
     
    475474    if($self->{'gs_sql'}) { # only want to work with sql db if buildcol.pl, gs_sql won't have
    476475    # a value except during buildcol, so when processor =~ m/buildproc$/.
    477     $self->{'gs_sql'}->disconnect_from_db()
    478         || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n");
    479 
    480     # explicitly delete gs_sql key (setting key to undef has a different meaning from deleting)
    481     # so all future use has to make the connection again
     476    $self->{'gs_sql'}->finished();
     477
     478    # Clear gs_sql (setting key to undef has a different meaning from deleting:
     479    # undef makes key still exist but its value is unded whereas delete deletes the key)
     480    # So all future use has to make the connection again
    482481    delete $self->{'gs_sql'};
    483482    }
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm

    r32573 r32578  
    143143    my $db_params = {
    144144    'collection_name' => $ENV{'GSDLCOLLECTION'},
    145     'verbosity' => 1   
     145    'verbosity' => 1
    146146    };
    147147
    148148    my $gs_sql = new gssql($db_params);
    149149   
    150     # try connecting to the mysql db, if that fails it will die
    151     # so don't bother preparing GreenstoneXMLPlugout by calling superclass' begin()
     150    # try connecting to the mysql db, die if that fails
     151    # So don't bother preparing GreenstoneXMLPlugout by calling superclass' begin() yet
    152152    if(!$gs_sql->connect_to_db({
    153153    'db_driver' => $self->{'db_driver'},
     
    181181    # This is fatal for the plugout, let's terminate here after disconnecting again
    182182    # PrintError would already have displayed the warning message on load fail
    183     $gs_sql->disconnect_from_db()
    184         || warn("Unable to disconnect from database.\n");
     183    $gs_sql->force_disconnect_from_db(); # disconnect_from_db() will issue a warning on error
    185184    die("Could not use db $db_name and/or prepare its tables. Can't proceed.\n");
    186185    }
     
    205204    $self->SUPER::end(@_);   
    206205   
    207     $self->{'gs_sql'}->disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); # disconnect_from_db() will also issue a warning, but this may be clearer
     206    $self->{'gs_sql'}->finished(); # will disconnect from db if last instance
     207    delete $self->{'gs_sql'}; # key gs_sql no longer exists, not just the value being undef
    208208}
    209209
Note: See TracChangeset for help on using the changeset viewer.